Commit 4ef3960e authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'add-frag-page-support-in-page-pool'

Yunsheng Lin says:

====================
add frag page support in page pool

This patchset adds frag page support in page pool and
enable skb's page frag recycling based on page pool in
hns3 drvier.
====================

Link: https://lore.kernel.org/r/1628217982-53533-1-git-send-email-linyunsheng@huawei.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 2a2b6e36 93188e96
......@@ -91,6 +91,7 @@ config HNS3
tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
depends on PCI
select NET_DEVLINK
select PAGE_POOL
help
This selects the framework support for Hisilicon Network Subsystem 3.
This layer facilitates clients like ENET, RoCE and user-space ethernet
......
......@@ -3205,6 +3205,21 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring,
unsigned int order = hns3_page_order(ring);
struct page *p;
if (ring->page_pool) {
p = page_pool_dev_alloc_frag(ring->page_pool,
&cb->page_offset,
hns3_buf_size(ring));
if (unlikely(!p))
return -ENOMEM;
cb->priv = p;
cb->buf = page_address(p);
cb->dma = page_pool_get_dma_addr(p);
cb->type = DESC_TYPE_PP_FRAG;
cb->reuse_flag = 0;
return 0;
}
p = dev_alloc_pages(order);
if (!p)
return -ENOMEM;
......@@ -3227,8 +3242,13 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring,
if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD |
DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB))
napi_consume_skb(cb->priv, budget);
else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias)
else if (!HNAE3_IS_TX_RING(ring)) {
if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias)
__page_frag_cache_drain(cb->priv, cb->pagecnt_bias);
else if (cb->type & DESC_TYPE_PP_FRAG)
page_pool_put_full_page(ring->page_pool, cb->priv,
false);
}
memset(cb, 0, sizeof(*cb));
}
......@@ -3315,7 +3335,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring,
int ret;
ret = hns3_alloc_buffer(ring, cb);
if (ret)
if (ret || ring->page_pool)
goto out;
ret = hns3_map_buffer(ring, cb);
......@@ -3337,7 +3357,8 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i)
if (ret)
return ret;
ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
ring->desc_cb[i].page_offset);
return 0;
}
......@@ -3367,7 +3388,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i,
{
hns3_unmap_buffer(ring, &ring->desc_cb[i]);
ring->desc_cb[i] = *res_cb;
ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma +
ring->desc_cb[i].page_offset);
ring->desc[i].rx.bd_base_info = 0;
}
......@@ -3539,6 +3561,12 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i,
u32 frag_size = size - pull_len;
bool reused;
if (ring->page_pool) {
skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset,
frag_size, truesize);
return;
}
/* Avoid re-using remote or pfmem page */
if (unlikely(!dev_page_is_reusable(desc_cb->priv)))
goto out;
......@@ -3856,6 +3884,9 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
/* We can reuse buffer as-is, just make sure it is reusable */
if (dev_page_is_reusable(desc_cb->priv))
desc_cb->reuse_flag = 1;
else if (desc_cb->type & DESC_TYPE_PP_FRAG)
page_pool_put_full_page(ring->page_pool, desc_cb->priv,
false);
else /* This page cannot be reused so discard it */
__page_frag_cache_drain(desc_cb->priv,
desc_cb->pagecnt_bias);
......@@ -3863,6 +3894,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
hns3_rx_ring_move_fw(ring);
return 0;
}
if (ring->page_pool)
skb_mark_for_recycle(skb);
u64_stats_update_begin(&ring->syncp);
ring->stats.seg_pkt_cnt++;
u64_stats_update_end(&ring->syncp);
......@@ -3901,6 +3936,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring)
"alloc rx fraglist skb fail\n");
return -ENXIO;
}
if (ring->page_pool)
skb_mark_for_recycle(new_skb);
ring->frag_num = 0;
if (ring->tail_skb) {
......@@ -4705,6 +4744,29 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv)
priv->ring = NULL;
}
static void hns3_alloc_page_pool(struct hns3_enet_ring *ring)
{
struct page_pool_params pp_params = {
.flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG |
PP_FLAG_DMA_SYNC_DEV,
.order = hns3_page_order(ring),
.pool_size = ring->desc_num * hns3_buf_size(ring) /
(PAGE_SIZE << hns3_page_order(ring)),
.nid = dev_to_node(ring_to_dev(ring)),
.dev = ring_to_dev(ring),
.dma_dir = DMA_FROM_DEVICE,
.offset = 0,
.max_len = PAGE_SIZE << hns3_page_order(ring),
};
ring->page_pool = page_pool_create(&pp_params);
if (IS_ERR(ring->page_pool)) {
dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n",
PTR_ERR(ring->page_pool));
ring->page_pool = NULL;
}
}
static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
{
int ret;
......@@ -4724,6 +4786,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring)
goto out_with_desc_cb;
if (!HNAE3_IS_TX_RING(ring)) {
hns3_alloc_page_pool(ring);
ret = hns3_alloc_ring_buffers(ring);
if (ret)
goto out_with_desc;
......@@ -4764,6 +4828,11 @@ void hns3_fini_ring(struct hns3_enet_ring *ring)
devm_kfree(ring_to_dev(ring), tx_spare);
ring->tx_spare = NULL;
}
if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) {
page_pool_destroy(ring->page_pool);
ring->page_pool = NULL;
}
}
static int hns3_buf_size2type(u32 buf_size)
......
......@@ -6,6 +6,7 @@
#include <linux/dim.h>
#include <linux/if_vlan.h>
#include <net/page_pool.h>
#include "hnae3.h"
......@@ -307,6 +308,7 @@ enum hns3_desc_type {
DESC_TYPE_BOUNCE_ALL = 1 << 3,
DESC_TYPE_BOUNCE_HEAD = 1 << 4,
DESC_TYPE_SGL_SKB = 1 << 5,
DESC_TYPE_PP_FRAG = 1 << 6,
};
struct hns3_desc_cb {
......@@ -451,6 +453,7 @@ struct hns3_enet_ring {
struct hnae3_queue *tqp;
int queue_index;
struct device *dev; /* will be used for DMA mapping of descriptors */
struct page_pool *page_pool;
/* statistic */
struct ring_stats stats;
......
......@@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
if (!skb)
return ERR_PTR(-ENOMEM);
skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool);
skb_mark_for_recycle(skb);
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
......@@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool,
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
skb_frag_page(frag), skb_frag_off(frag),
skb_frag_size(frag), PAGE_SIZE);
/* We don't need to reset pp_recycle here. It's already set, so
* just mark fragments for recycling.
*/
page_pool_store_mem_info(skb_frag_page(frag), pool);
}
return skb;
......
......@@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi,
}
if (pp)
skb_mark_for_recycle(skb, page, pp);
skb_mark_for_recycle(skb);
else
dma_unmap_single_attrs(dev->dev.parent, dma_addr,
bm_pool->buf_size, DMA_FROM_DEVICE,
......
......@@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
skb->protocol = eth_type_trans(skb, ndev);
/* mark skb for recycling */
skb_mark_for_recycle(skb, page, pool);
skb_mark_for_recycle(skb);
netif_receive_skb(skb);
ndev->stats.rx_bytes += len;
......
......@@ -375,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
skb->protocol = eth_type_trans(skb, ndev);
/* mark skb for recycling */
skb_mark_for_recycle(skb, page, pool);
skb_mark_for_recycle(skb);
netif_receive_skb(skb);
ndev->stats.rx_bytes += len;
......
......@@ -103,11 +103,19 @@ struct page {
unsigned long pp_magic;
struct page_pool *pp;
unsigned long _pp_mapping_pad;
unsigned long dma_addr;
union {
/**
* dma_addr_upper: might require a 64-bit
* value on 32-bit architectures.
*/
unsigned long dma_addr_upper;
/**
* @dma_addr: might require a 64-bit value on
* 32-bit architectures.
* For frag page support, not supported in
* 32-bit architectures with 64-bit DMA.
*/
unsigned long dma_addr[2];
atomic_long_t pp_frag_count;
};
};
struct { /* slab, slob and slub */
union {
......
......@@ -4712,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
}
#ifdef CONFIG_PAGE_POOL
static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page,
struct page_pool *pp)
static inline void skb_mark_for_recycle(struct sk_buff *skb)
{
skb->pp_recycle = 1;
page_pool_store_mem_info(page, pp);
}
#endif
......
......@@ -45,7 +45,10 @@
* Please note DMA-sync-for-CPU is still
* device driver responsibility
*/
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV)
#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\
PP_FLAG_DMA_SYNC_DEV |\
PP_FLAG_PAGE_FRAG)
/*
* Fast allocation side cache array/stack
......@@ -88,6 +91,9 @@ struct page_pool {
unsigned long defer_warn;
u32 pages_state_hold_cnt;
unsigned int frag_offset;
struct page *frag_page;
long frag_users;
/*
* Data structure for allocation side
......@@ -137,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
return page_pool_alloc_pages(pool, gfp);
}
struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
unsigned int size, gfp_t gfp);
static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
unsigned int *offset,
unsigned int size)
{
gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
return page_pool_alloc_frag(pool, offset, size, gfp);
}
/* get the stored dma direction. A driver might decide to treat this locally and
* avoid the extra cache line from page_pool to determine the direction
*/
......@@ -198,19 +216,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
page_pool_put_full_page(pool, page, true);
}
#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \
(sizeof(dma_addr_t) > sizeof(unsigned long))
static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
{
dma_addr_t ret = page->dma_addr[0];
if (sizeof(dma_addr_t) > sizeof(unsigned long))
ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
dma_addr_t ret = page->dma_addr;
if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
return ret;
}
static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
{
page->dma_addr[0] = addr;
if (sizeof(dma_addr_t) > sizeof(unsigned long))
page->dma_addr[1] = upper_32_bits(addr);
page->dma_addr = addr;
if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
page->dma_addr_upper = upper_32_bits(addr);
}
static inline void page_pool_set_frag_count(struct page *page, long nr)
{
atomic_long_set(&page->pp_frag_count, nr);
}
static inline long page_pool_atomic_sub_frag_count_return(struct page *page,
long nr)
{
long ret;
/* As suggested by Alexander, atomic_long_read() may cover up the
* reference count errors, so avoid calling atomic_long_read() in
* the cases of freeing or draining the page_frags, where we would
* not expect it to match or that are slowpath anyway.
*/
if (__builtin_constant_p(nr) &&
atomic_long_read(&page->pp_frag_count) == nr)
return 0;
ret = atomic_long_sub_return(nr, &page->pp_frag_count);
WARN_ON(ret < 0);
return ret;
}
static inline bool is_page_pool_compiled_in(void)
......@@ -253,11 +300,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool)
spin_unlock_bh(&pool->ring.producer_lock);
}
/* Store mem_info on struct page and use it while recycling skb frags */
static inline
void page_pool_store_mem_info(struct page *page, struct page_pool *pp)
{
page->pp = pp;
}
#endif /* _NET_PAGE_POOL_H */
......@@ -24,6 +24,8 @@
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
#define BIAS_MAX LONG_MAX
static int page_pool_init(struct page_pool *pool,
const struct page_pool_params *params)
{
......@@ -67,6 +69,10 @@ static int page_pool_init(struct page_pool *pool,
*/
}
if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
pool->p.flags & PP_FLAG_PAGE_FRAG)
return -EINVAL;
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
......@@ -206,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
return true;
}
static void page_pool_set_pp_info(struct page_pool *pool,
struct page *page)
{
page->pp = pool;
page->pp_magic |= PP_SIGNATURE;
}
static void page_pool_clear_pp_info(struct page *page)
{
page->pp_magic = 0;
page->pp = NULL;
}
static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
gfp_t gfp)
{
......@@ -222,7 +241,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
return NULL;
}
page->pp_magic |= PP_SIGNATURE;
page_pool_set_pp_info(pool, page);
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
......@@ -266,7 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
put_page(page);
continue;
}
page->pp_magic |= PP_SIGNATURE;
page_pool_set_pp_info(pool, page);
pool->alloc.cache[pool->alloc.count++] = page;
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
......@@ -345,7 +365,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
DMA_ATTR_SKIP_CPU_SYNC);
page_pool_set_dma_addr(page, 0);
skip_dma_unmap:
page->pp_magic = 0;
page_pool_clear_pp_info(page);
/* This may be the last page returned, releasing the pool, so
* it is not safe to reference pool afterwards.
......@@ -405,6 +425,11 @@ static __always_inline struct page *
__page_pool_put_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
/* It is not the last user for the page frag case */
if (pool->p.flags & PP_FLAG_PAGE_FRAG &&
page_pool_atomic_sub_frag_count_return(page, 1))
return NULL;
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
* regular page allocator APIs.
......@@ -497,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
}
EXPORT_SYMBOL(page_pool_put_page_bulk);
static struct page *page_pool_drain_frag(struct page_pool *pool,
struct page *page)
{
long drain_count = BIAS_MAX - pool->frag_users;
/* Some user is still using the page frag */
if (likely(page_pool_atomic_sub_frag_count_return(page,
drain_count)))
return NULL;
if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
page_pool_dma_sync_for_device(pool, page, -1);
return page;
}
page_pool_return_page(pool, page);
return NULL;
}
static void page_pool_free_frag(struct page_pool *pool)
{
long drain_count = BIAS_MAX - pool->frag_users;
struct page *page = pool->frag_page;
pool->frag_page = NULL;
if (!page ||
page_pool_atomic_sub_frag_count_return(page, drain_count))
return;
page_pool_return_page(pool, page);
}
struct page *page_pool_alloc_frag(struct page_pool *pool,
unsigned int *offset,
unsigned int size, gfp_t gfp)
{
unsigned int max_size = PAGE_SIZE << pool->p.order;
struct page *page = pool->frag_page;
if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
size > max_size))
return NULL;
size = ALIGN(size, dma_get_cache_alignment());
*offset = pool->frag_offset;
if (page && *offset + size > max_size) {
page = page_pool_drain_frag(pool, page);
if (page)
goto frag_reset;
}
if (!page) {
page = page_pool_alloc_pages(pool, gfp);
if (unlikely(!page)) {
pool->frag_page = NULL;
return NULL;
}
pool->frag_page = page;
frag_reset:
pool->frag_users = 1;
*offset = 0;
pool->frag_offset = size;
page_pool_set_frag_count(page, BIAS_MAX);
return page;
}
pool->frag_users++;
pool->frag_offset = *offset + size;
return page;
}
EXPORT_SYMBOL(page_pool_alloc_frag);
static void page_pool_empty_ring(struct page_pool *pool)
{
struct page *page;
......@@ -602,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_put(pool))
return;
page_pool_free_frag(pool);
if (!page_pool_release(pool))
return;
......@@ -644,7 +749,6 @@ bool page_pool_return_skb_page(struct page *page)
* The page will be returned to the pool here regardless of the
* 'flipped' fragment being in use or not.
*/
page->pp = NULL;
page_pool_put_full_page(pp, page, false);
return true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment