Commit ebf8c9aa authored by Yevgeny Petrilin's avatar Yevgeny Petrilin Committed by David S. Miller

net/mlx4_en: Saving mem access on data path

Localized the pdev->dev, and using dma_map instead of pci_map
There are multiple map/unmap operations on data path,
optimizing those by saving redundant pointer access.
Those places were identified as hot-spots when running kernel profiling
during some benchmarks.
The fixes had most impact when testing packet rate with small packets,
reducing several % from CPU load, and in some case being the difference
between reaching wire speed or being CPU bound.
Signed-off-by: default avatarYevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6975f4ce
...@@ -1062,6 +1062,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, ...@@ -1062,6 +1062,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
memset(priv, 0, sizeof(struct mlx4_en_priv)); memset(priv, 0, sizeof(struct mlx4_en_priv));
priv->dev = dev; priv->dev = dev;
priv->mdev = mdev; priv->mdev = mdev;
priv->ddev = &mdev->pdev->dev;
priv->prof = prof; priv->prof = prof;
priv->port = port; priv->port = port;
priv->port_up = false; priv->port_up = false;
......
...@@ -48,7 +48,6 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, ...@@ -48,7 +48,6 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
struct mlx4_en_rx_alloc *ring_alloc, struct mlx4_en_rx_alloc *ring_alloc,
int i) int i)
{ {
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i]; struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
struct page *page; struct page *page;
...@@ -72,7 +71,7 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv, ...@@ -72,7 +71,7 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
skb_frags[i].offset = page_alloc->offset; skb_frags[i].offset = page_alloc->offset;
page_alloc->offset += frag_info->frag_stride; page_alloc->offset += frag_info->frag_stride;
} }
dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) + dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) +
skb_frags[i].offset, frag_info->frag_size, skb_frags[i].offset, frag_info->frag_size,
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
rx_desc->data[i].addr = cpu_to_be64(dma); rx_desc->data[i].addr = cpu_to_be64(dma);
...@@ -186,7 +185,6 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, ...@@ -186,7 +185,6 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_ring *ring,
int index) int index)
{ {
struct mlx4_en_dev *mdev = priv->mdev;
struct page_frag *skb_frags; struct page_frag *skb_frags;
struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride); struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
dma_addr_t dma; dma_addr_t dma;
...@@ -198,7 +196,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, ...@@ -198,7 +196,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
dma = be64_to_cpu(rx_desc->data[nr].addr); dma = be64_to_cpu(rx_desc->data[nr].addr);
en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, dma_unmap_single(priv->ddev, dma, skb_frags[nr].size,
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
put_page(skb_frags[nr].page); put_page(skb_frags[nr].page);
} }
...@@ -412,7 +410,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, ...@@ -412,7 +410,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
int length) int length)
{ {
struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_frag_info *frag_info; struct mlx4_en_frag_info *frag_info;
int nr; int nr;
dma_addr_t dma; dma_addr_t dma;
...@@ -435,7 +432,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, ...@@ -435,7 +432,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
goto fail; goto fail;
/* Unmap buffer */ /* Unmap buffer */
pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]), dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]),
PCI_DMA_FROMDEVICE); PCI_DMA_FROMDEVICE);
} }
/* Adjust size of last fragment to match actual length */ /* Adjust size of last fragment to match actual length */
...@@ -461,7 +458,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, ...@@ -461,7 +458,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
struct mlx4_en_rx_alloc *page_alloc, struct mlx4_en_rx_alloc *page_alloc,
unsigned int length) unsigned int length)
{ {
struct mlx4_en_dev *mdev = priv->mdev;
struct sk_buff *skb; struct sk_buff *skb;
void *va; void *va;
int used_frags; int used_frags;
...@@ -483,10 +479,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, ...@@ -483,10 +479,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
/* We are copying all relevant data to the skb - temporarily /* We are copying all relevant data to the skb - temporarily
* synch buffers for the copy */ * synch buffers for the copy */
dma = be64_to_cpu(rx_desc->data[0].addr); dma = be64_to_cpu(rx_desc->data[0].addr);
dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length, dma_sync_single_for_cpu(priv->ddev, dma, length,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
skb_copy_to_linear_data(skb, va, length); skb_copy_to_linear_data(skb, va, length);
dma_sync_single_for_device(&mdev->pdev->dev, dma, length, dma_sync_single_for_device(priv->ddev, dma, length,
DMA_FROM_DEVICE); DMA_FROM_DEVICE);
skb->tail += length; skb->tail += length;
} else { } else {
......
...@@ -198,7 +198,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -198,7 +198,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring, struct mlx4_en_tx_ring *ring,
int index, u8 owner) int index, u8 owner)
{ {
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
...@@ -214,7 +213,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -214,7 +213,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
if (!tx_info->inl) { if (!tx_info->inl) {
if (tx_info->linear) { if (tx_info->linear) {
pci_unmap_single(mdev->pdev, dma_unmap_single(priv->ddev,
(dma_addr_t) be64_to_cpu(data->addr), (dma_addr_t) be64_to_cpu(data->addr),
be32_to_cpu(data->byte_count), be32_to_cpu(data->byte_count),
PCI_DMA_TODEVICE); PCI_DMA_TODEVICE);
...@@ -223,7 +222,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -223,7 +222,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
for (i = 0; i < frags; i++) { for (i = 0; i < frags; i++) {
frag = &skb_shinfo(skb)->frags[i]; frag = &skb_shinfo(skb)->frags[i];
pci_unmap_page(mdev->pdev, dma_unmap_page(priv->ddev,
(dma_addr_t) be64_to_cpu(data[i].addr), (dma_addr_t) be64_to_cpu(data[i].addr),
skb_frag_size(frag), PCI_DMA_TODEVICE); skb_frag_size(frag), PCI_DMA_TODEVICE);
} }
...@@ -241,7 +240,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -241,7 +240,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
} }
if (tx_info->linear) { if (tx_info->linear) {
pci_unmap_single(mdev->pdev, dma_unmap_single(priv->ddev,
(dma_addr_t) be64_to_cpu(data->addr), (dma_addr_t) be64_to_cpu(data->addr),
be32_to_cpu(data->byte_count), be32_to_cpu(data->byte_count),
PCI_DMA_TODEVICE); PCI_DMA_TODEVICE);
...@@ -253,7 +252,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, ...@@ -253,7 +252,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
if ((void *) data >= end) if ((void *) data >= end)
data = ring->buf; data = ring->buf;
frag = &skb_shinfo(skb)->frags[i]; frag = &skb_shinfo(skb)->frags[i];
pci_unmap_page(mdev->pdev, dma_unmap_page(priv->ddev,
(dma_addr_t) be64_to_cpu(data->addr), (dma_addr_t) be64_to_cpu(data->addr),
skb_frag_size(frag), PCI_DMA_TODEVICE); skb_frag_size(frag), PCI_DMA_TODEVICE);
++data; ++data;
...@@ -733,7 +732,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -733,7 +732,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Map fragments */ /* Map fragments */
for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
frag = &skb_shinfo(skb)->frags[i]; frag = &skb_shinfo(skb)->frags[i];
dma = skb_frag_dma_map(&mdev->dev->pdev->dev, frag, dma = skb_frag_dma_map(priv->ddev, frag,
0, skb_frag_size(frag), 0, skb_frag_size(frag),
DMA_TO_DEVICE); DMA_TO_DEVICE);
data->addr = cpu_to_be64(dma); data->addr = cpu_to_be64(dma);
...@@ -745,7 +744,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -745,7 +744,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
/* Map linear part */ /* Map linear part */
if (tx_info->linear) { if (tx_info->linear) {
dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size, dma = dma_map_single(priv->ddev, skb->data + lso_header_size,
skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE); skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE);
data->addr = cpu_to_be64(dma); data->addr = cpu_to_be64(dma);
data->lkey = cpu_to_be32(mdev->mr.key); data->lkey = cpu_to_be32(mdev->mr.key);
......
...@@ -482,6 +482,7 @@ struct mlx4_en_priv { ...@@ -482,6 +482,7 @@ struct mlx4_en_priv {
struct mlx4_en_stat_out_mbox hw_stats; struct mlx4_en_stat_out_mbox hw_stats;
int vids[128]; int vids[128];
bool wol; bool wol;
struct device *ddev;
}; };
enum mlx4_en_wol { enum mlx4_en_wol {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment