Commit b5a54d9a authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

mlx4: use order-0 pages for RX

Use of order-3 pages is problematic in some cases.

This patch might add three kinds of regression :

1) a CPU performance regression, but we will add later page
recycling and performance should be back.

2) TCP receiver could grow its receive window slightly slower,
   because skb->len/skb->truesize ratio will decrease.
   This is mostly ok, we prefer being conservative to not risk OOM,
   and eventually tune TCP better in the future.
   This is consistent with other drivers using 2048 per ethernet frame.

3) Because we allocate one page per RX slot, we consume more
   memory for the ring buffers. XDP already had this constraint anyway.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarTariq Toukan <tariqt@mellanox.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 60c7f5ae
...@@ -53,38 +53,26 @@ ...@@ -53,38 +53,26 @@
static int mlx4_alloc_pages(struct mlx4_en_priv *priv, static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
struct mlx4_en_rx_alloc *page_alloc, struct mlx4_en_rx_alloc *page_alloc,
const struct mlx4_en_frag_info *frag_info, const struct mlx4_en_frag_info *frag_info,
gfp_t _gfp) gfp_t gfp)
{ {
int order;
struct page *page; struct page *page;
dma_addr_t dma; dma_addr_t dma;
for (order = priv->rx_page_order; ;) { page = alloc_page(gfp);
gfp_t gfp = _gfp; if (unlikely(!page))
if (order)
gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC;
page = alloc_pages(gfp, order);
if (likely(page))
break;
if (--order < 0 ||
((PAGE_SIZE << order) < frag_info->frag_size))
return -ENOMEM; return -ENOMEM;
} dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir);
dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
priv->dma_dir);
if (unlikely(dma_mapping_error(priv->ddev, dma))) { if (unlikely(dma_mapping_error(priv->ddev, dma))) {
put_page(page); put_page(page);
return -ENOMEM; return -ENOMEM;
} }
page_alloc->page_size = PAGE_SIZE << order;
page_alloc->page = page; page_alloc->page = page;
page_alloc->dma = dma; page_alloc->dma = dma;
page_alloc->page_offset = 0; page_alloc->page_offset = 0;
/* Not doing get_page() for each frag is a big win /* Not doing get_page() for each frag is a big win
* on asymetric workloads. Note we can not use atomic_set(). * on asymetric workloads. Note we can not use atomic_set().
*/ */
page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1); page_ref_add(page, PAGE_SIZE / frag_info->frag_stride - 1);
return 0; return 0;
} }
...@@ -105,7 +93,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, ...@@ -105,7 +93,7 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
page_alloc[i].page_offset += frag_info->frag_stride; page_alloc[i].page_offset += frag_info->frag_stride;
if (page_alloc[i].page_offset + frag_info->frag_stride <= if (page_alloc[i].page_offset + frag_info->frag_stride <=
ring_alloc[i].page_size) PAGE_SIZE)
continue; continue;
if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
...@@ -127,11 +115,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, ...@@ -127,11 +115,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
while (i--) { while (i--) {
if (page_alloc[i].page != ring_alloc[i].page) { if (page_alloc[i].page != ring_alloc[i].page) {
dma_unmap_page(priv->ddev, page_alloc[i].dma, dma_unmap_page(priv->ddev, page_alloc[i].dma,
page_alloc[i].page_size, PAGE_SIZE, priv->dma_dir);
priv->dma_dir);
page = page_alloc[i].page; page = page_alloc[i].page;
/* Revert changes done by mlx4_alloc_pages */ /* Revert changes done by mlx4_alloc_pages */
page_ref_sub(page, page_alloc[i].page_size / page_ref_sub(page, PAGE_SIZE /
priv->frag_info[i].frag_stride - 1); priv->frag_info[i].frag_stride - 1);
put_page(page); put_page(page);
} }
...@@ -147,8 +134,8 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, ...@@ -147,8 +134,8 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
if (next_frag_end > frags[i].page_size) if (next_frag_end > PAGE_SIZE)
dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, dma_unmap_page(priv->ddev, frags[i].dma, PAGE_SIZE,
priv->dma_dir); priv->dma_dir);
if (frags[i].page) if (frags[i].page)
...@@ -168,9 +155,8 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, ...@@ -168,9 +155,8 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
frag_info, GFP_KERNEL | __GFP_COLD)) frag_info, GFP_KERNEL | __GFP_COLD))
goto out; goto out;
en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n", en_dbg(DRV, priv, " frag %d allocator: - frags:%d\n",
i, ring->page_alloc[i].page_size, i, page_ref_count(ring->page_alloc[i].page));
page_ref_count(ring->page_alloc[i].page));
} }
return 0; return 0;
...@@ -180,11 +166,10 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, ...@@ -180,11 +166,10 @@ static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
page_alloc = &ring->page_alloc[i]; page_alloc = &ring->page_alloc[i];
dma_unmap_page(priv->ddev, page_alloc->dma, dma_unmap_page(priv->ddev, page_alloc->dma,
page_alloc->page_size, PAGE_SIZE, priv->dma_dir);
priv->dma_dir);
page = page_alloc->page; page = page_alloc->page;
/* Revert changes done by mlx4_alloc_pages */ /* Revert changes done by mlx4_alloc_pages */
page_ref_sub(page, page_alloc->page_size / page_ref_sub(page, PAGE_SIZE /
priv->frag_info[i].frag_stride - 1); priv->frag_info[i].frag_stride - 1);
put_page(page); put_page(page);
page_alloc->page = NULL; page_alloc->page = NULL;
...@@ -206,9 +191,9 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, ...@@ -206,9 +191,9 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
i, page_count(page_alloc->page)); i, page_count(page_alloc->page));
dma_unmap_page(priv->ddev, page_alloc->dma, dma_unmap_page(priv->ddev, page_alloc->dma,
page_alloc->page_size, priv->dma_dir); PAGE_SIZE, priv->dma_dir);
while (page_alloc->page_offset + frag_info->frag_stride < while (page_alloc->page_offset + frag_info->frag_stride <
page_alloc->page_size) { PAGE_SIZE) {
put_page(page_alloc->page); put_page(page_alloc->page);
page_alloc->page_offset += frag_info->frag_stride; page_alloc->page_offset += frag_info->frag_stride;
} }
...@@ -1191,7 +1176,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) ...@@ -1191,7 +1176,6 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
* This only works when num_frags == 1. * This only works when num_frags == 1.
*/ */
if (priv->tx_ring_num[TX_XDP]) { if (priv->tx_ring_num[TX_XDP]) {
priv->rx_page_order = 0;
priv->frag_info[0].frag_size = eff_mtu; priv->frag_info[0].frag_size = eff_mtu;
/* This will gain efficient xdp frame recycling at the /* This will gain efficient xdp frame recycling at the
* expense of more costly truesize accounting * expense of more costly truesize accounting
...@@ -1201,22 +1185,32 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) ...@@ -1201,22 +1185,32 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
priv->rx_headroom = XDP_PACKET_HEADROOM; priv->rx_headroom = XDP_PACKET_HEADROOM;
i = 1; i = 1;
} else { } else {
int buf_size = 0; int frag_size_max = 2048, buf_size = 0;
/* should not happen, right ? */
if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048)
frag_size_max = PAGE_SIZE;
while (buf_size < eff_mtu) { while (buf_size < eff_mtu) {
int frag_size = eff_mtu - buf_size; int frag_stride, frag_size = eff_mtu - buf_size;
int pad, nb;
if (i < MLX4_EN_MAX_RX_FRAGS - 1) if (i < MLX4_EN_MAX_RX_FRAGS - 1)
frag_size = min(frag_size, 2048); frag_size = min(frag_size, frag_size_max);
priv->frag_info[i].frag_size = frag_size; priv->frag_info[i].frag_size = frag_size;
frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES);
/* We can only pack 2 1536-bytes frames in on 4K page
* Therefore, each frame would consume more bytes (truesize)
*/
nb = PAGE_SIZE / frag_stride;
pad = (PAGE_SIZE - nb * frag_stride) / nb;
pad &= ~(SMP_CACHE_BYTES - 1);
priv->frag_info[i].frag_stride = frag_stride + pad;
priv->frag_info[i].frag_stride = ALIGN(frag_size,
SMP_CACHE_BYTES);
buf_size += frag_size; buf_size += frag_size;
i++; i++;
} }
priv->rx_page_order = MLX4_EN_ALLOC_PREFER_ORDER;
priv->dma_dir = PCI_DMA_FROMDEVICE; priv->dma_dir = PCI_DMA_FROMDEVICE;
priv->rx_headroom = 0; priv->rx_headroom = 0;
} }
......
...@@ -102,9 +102,6 @@ ...@@ -102,9 +102,6 @@
/* Use the maximum between 16384 and a single page */ /* Use the maximum between 16384 and a single page */
#define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384)
#define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768), \
PAGE_ALLOC_COSTLY_ORDER)
#define MLX4_EN_MAX_RX_FRAGS 4 #define MLX4_EN_MAX_RX_FRAGS 4
/* Maximum ring sizes */ /* Maximum ring sizes */
...@@ -256,7 +253,6 @@ struct mlx4_en_rx_alloc { ...@@ -256,7 +253,6 @@ struct mlx4_en_rx_alloc {
struct page *page; struct page *page;
dma_addr_t dma; dma_addr_t dma;
u32 page_offset; u32 page_offset;
u32 page_size;
}; };
#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) #define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT)
...@@ -579,7 +575,6 @@ struct mlx4_en_priv { ...@@ -579,7 +575,6 @@ struct mlx4_en_priv {
u8 num_frags; u8 num_frags;
u8 log_rx_info; u8 log_rx_info;
u8 dma_dir; u8 dma_dir;
u8 rx_page_order;
u16 rx_headroom; u16 rx_headroom;
struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment