Commit eb05529a authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'page_pool-allow-direct-bulk-recycling'

Alexander Lobakin says:

====================
page_pool: allow direct bulk recycling

Previously, there was no reliable way to check whether it's safe to use
direct PP cache. The drivers were passing @allow_direct to the PP
recycling functions and that was it. Bulk recycling is used by
xdp_return_frame_bulk() on .ndo_xdp_xmit() frames completion where
the page origin is unknown, thus the direct recycling has never been
tried.
Now that we have at least 2 ways of checking if we're allowed to perform
direct recycling -- pool->p.napi (Jakub) and pool->cpuid (Lorenzo), we
can use them when doing bulk recycling as well. Just move that logic
from the skb core to the PP core and call it before
__page_pool_put_page() every time @allow_direct is false.
Under high .ndo_xdp_xmit() traffic load, the win is 2-3% Pps assuming
the sending driver uses xdp_return_frame_bulk() on Tx completion.
====================

Link: https://lore.kernel.org/r/20240329165507.3240110-1-aleksander.lobakin@intel.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 8db2509f 39806b96
......@@ -3510,25 +3510,25 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom);
int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
struct bpf_prog *prog);
bool napi_pp_put_page(struct page *page, bool napi_safe);
bool napi_pp_put_page(struct page *page);
static inline void
skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe)
skb_page_unref(const struct sk_buff *skb, struct page *page)
{
#ifdef CONFIG_PAGE_POOL
if (skb->pp_recycle && napi_pp_put_page(page, napi_safe))
if (skb->pp_recycle && napi_pp_put_page(page))
return;
#endif
put_page(page);
}
static inline void
napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
napi_frag_unref(skb_frag_t *frag, bool recycle)
{
struct page *page = skb_frag_page(frag);
#ifdef CONFIG_PAGE_POOL
if (recycle && napi_pp_put_page(page, napi_safe))
if (recycle && napi_pp_put_page(page))
return;
#endif
put_page(page);
......@@ -3544,7 +3544,7 @@ napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
*/
static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
{
napi_frag_unref(frag, recycle, false);
napi_frag_unref(frag, recycle);
}
/**
......
......@@ -690,8 +690,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
page_pool_dma_sync_for_device(pool, page,
dma_sync_size);
if (allow_direct && in_softirq() &&
page_pool_recycle_in_cache(page, pool))
if (allow_direct && page_pool_recycle_in_cache(page, pool))
return NULL;
/* Page found as candidate for recycling */
......@@ -716,9 +715,35 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
return NULL;
}
static bool page_pool_napi_local(const struct page_pool *pool)
{
const struct napi_struct *napi;
u32 cpuid;
if (unlikely(!in_softirq()))
return false;
/* Allow direct recycle if we have reasons to believe that we are
* in the same context as the consumer would run, so there's
* no possible race.
* __page_pool_put_page() makes sure we're not in hardirq context
* and interrupts are enabled prior to accessing the cache.
*/
cpuid = smp_processor_id();
if (READ_ONCE(pool->cpuid) == cpuid)
return true;
napi = READ_ONCE(pool->p.napi);
return napi && READ_ONCE(napi->list_owner) == cpuid;
}
void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
unsigned int dma_sync_size, bool allow_direct)
{
if (!allow_direct)
allow_direct = page_pool_napi_local(pool);
page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
if (page && !page_pool_recycle_in_ring(pool, page)) {
/* Cache full, fallback to free pages */
......@@ -747,8 +772,11 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
int count)
{
int i, bulk_len = 0;
bool allow_direct;
bool in_softirq;
allow_direct = page_pool_napi_local(pool);
for (i = 0; i < count; i++) {
struct page *page = virt_to_head_page(data[i]);
......@@ -756,13 +784,13 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
if (!page_pool_is_last_ref(page))
continue;
page = __page_pool_put_page(pool, page, -1, false);
page = __page_pool_put_page(pool, page, -1, allow_direct);
/* Approved for bulk recycling in ptr_ring cache */
if (page)
data[bulk_len++] = page;
}
if (unlikely(!bulk_len))
if (!bulk_len)
return;
/* Bulk producer into ptr_ring page_pool cache */
......@@ -969,7 +997,7 @@ void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *),
static void page_pool_disable_direct_recycling(struct page_pool *pool)
{
/* Disable direct recycling based on pool->cpuid.
* Paired with READ_ONCE() in napi_pp_put_page().
* Paired with READ_ONCE() in page_pool_napi_local().
*/
WRITE_ONCE(pool->cpuid, -1);
......
......@@ -1004,11 +1004,8 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
EXPORT_SYMBOL(skb_cow_data_for_xdp);
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(struct page *page, bool napi_safe)
bool napi_pp_put_page(struct page *page)
{
bool allow_direct = false;
struct page_pool *pp;
page = compound_head(page);
/* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
......@@ -1021,39 +1018,18 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
if (unlikely(!is_pp_page(page)))
return false;
pp = page->pp;
/* Allow direct recycle if we have reasons to believe that we are
* in the same context as the consumer would run, so there's
* no possible race.
* __page_pool_put_page() makes sure we're not in hardirq context
* and interrupts are enabled prior to accessing the cache.
*/
if (napi_safe || in_softirq()) {
const struct napi_struct *napi = READ_ONCE(pp->p.napi);
unsigned int cpuid = smp_processor_id();
allow_direct = napi && READ_ONCE(napi->list_owner) == cpuid;
allow_direct |= READ_ONCE(pp->cpuid) == cpuid;
}
/* Driver set this to memory recycling info. Reset it on recycle.
* This will *not* work for NIC using a split-page memory model.
* The page will be returned to the pool here regardless of the
* 'flipped' fragment being in use or not.
*/
page_pool_put_full_page(pp, page, allow_direct);
page_pool_put_full_page(page->pp, page, false);
return true;
}
EXPORT_SYMBOL(napi_pp_put_page);
#endif
static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
static bool skb_pp_recycle(struct sk_buff *skb, void *data)
{
if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
return false;
return napi_pp_put_page(virt_to_page(data), napi_safe);
return napi_pp_put_page(virt_to_page(data));
}
/**
......@@ -1095,12 +1071,12 @@ static void skb_kfree_head(void *head, unsigned int end_offset)
kfree(head);
}
static void skb_free_head(struct sk_buff *skb, bool napi_safe)
static void skb_free_head(struct sk_buff *skb)
{
unsigned char *head = skb->head;
if (skb->head_frag) {
if (skb_pp_recycle(skb, head, napi_safe))
if (skb_pp_recycle(skb, head))
return;
skb_free_frag(head);
} else {
......@@ -1108,8 +1084,7 @@ static void skb_free_head(struct sk_buff *skb, bool napi_safe)
}
}
static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
bool napi_safe)
static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
int i;
......@@ -1126,13 +1101,13 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
}
for (i = 0; i < shinfo->nr_frags; i++)
napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
napi_frag_unref(&shinfo->frags[i], skb->pp_recycle);
free_head:
if (shinfo->frag_list)
kfree_skb_list_reason(shinfo->frag_list, reason);
skb_free_head(skb, napi_safe);
skb_free_head(skb);
exit:
/* When we clone an SKB we copy the reycling bit. The pp_recycle
* bit is only set on the head though, so in order to avoid races
......@@ -1193,12 +1168,11 @@ void skb_release_head_state(struct sk_buff *skb)
}
/* Free everything but the sk_buff shell. */
static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
bool napi_safe)
static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason)
{
skb_release_head_state(skb);
if (likely(skb->head))
skb_release_data(skb, reason, napi_safe);
skb_release_data(skb, reason);
}
/**
......@@ -1212,7 +1186,7 @@ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason,
void __kfree_skb(struct sk_buff *skb)
{
skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false);
skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED);
kfree_skbmem(skb);
}
EXPORT_SYMBOL(__kfree_skb);
......@@ -1269,7 +1243,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
return;
}
skb_release_all(skb, reason, false);
skb_release_all(skb, reason);
sa->skb_array[sa->skb_count++] = skb;
if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
......@@ -1443,7 +1417,7 @@ EXPORT_SYMBOL(consume_skb);
void __consume_stateless_skb(struct sk_buff *skb)
{
trace_consume_skb(skb, __builtin_return_address(0));
skb_release_data(skb, SKB_CONSUMED, false);
skb_release_data(skb, SKB_CONSUMED);
kfree_skbmem(skb);
}
......@@ -1470,7 +1444,7 @@ static void napi_skb_cache_put(struct sk_buff *skb)
void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason)
{
skb_release_all(skb, reason, true);
skb_release_all(skb, reason);
napi_skb_cache_put(skb);
}
......@@ -1508,7 +1482,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
}
skb_release_all(skb, SKB_CONSUMED, !!budget);
skb_release_all(skb, SKB_CONSUMED);
napi_skb_cache_put(skb);
}
EXPORT_SYMBOL(napi_consume_skb);
......@@ -1639,7 +1613,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg);
*/
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src)
{
skb_release_all(dst, SKB_CONSUMED, false);
skb_release_all(dst, SKB_CONSUMED);
return __skb_clone(dst, src);
}
EXPORT_SYMBOL_GPL(skb_morph);
......@@ -2271,9 +2245,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
skb_release_data(skb, SKB_CONSUMED, false);
skb_release_data(skb, SKB_CONSUMED);
} else {
skb_free_head(skb, false);
skb_free_head(skb);
}
off = (data + nhead) - skb->head;
......@@ -6574,12 +6548,12 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
skb_release_data(skb, SKB_CONSUMED, false);
skb_release_data(skb, SKB_CONSUMED);
} else {
/* we can reuse existing recount- all we did was
* relocate values
*/
skb_free_head(skb, false);
skb_free_head(skb);
}
skb->head = data;
......@@ -6714,7 +6688,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
skb_kfree_head(data, size);
return -ENOMEM;
}
skb_release_data(skb, SKB_CONSUMED, false);
skb_release_data(skb, SKB_CONSUMED);
skb->head = data;
skb->head_frag = 0;
......
......@@ -114,7 +114,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
skb_page_unref(skb, sg_page(sg), false);
skb_page_unref(skb, sg_page(sg));
}
#ifdef CONFIG_INET_ESPINTCP
......
......@@ -131,7 +131,7 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp, struct sk_buff *skb)
*/
if (req->src != req->dst)
for (sg = sg_next(req->src); sg; sg = sg_next(sg))
skb_page_unref(skb, sg_page(sg), false);
skb_page_unref(skb, sg_page(sg));
}
#ifdef CONFIG_INET6_ESPINTCP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment