Commit 5584d9e6 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'xdp: recycle Page Pool backed skbs built from XDP frames'

Alexander Lobakin says:

====================

Yeah, I still remember that "Who needs cpumap nowadays" (c), but anyway.

__xdp_build_skb_from_frame() missed the moment when the networking stack
became able to recycle skb pages backed by a page_pool. This was making
e.g. cpumap redirect even less effective than simple %XDP_PASS. veth was
also affected in some scenarios.
A lot of drivers use skb_mark_for_recycle() already, it's been almost
two years and seems like there are no issues in using it in the generic
code too. {__,}xdp_release_frame() can be then removed as it losts its
last user.
Page Pool becomes then zero-alloc (or almost) in the abovementioned
cases, too. Other memory type models (who needs them at this point)
have no changes.

Some numbers on 1 Xeon Platinum core bombed with 27 Mpps of 64-byte
IPv6 UDP, iavf w/XDP[0] (CONFIG_PAGE_POOL_STATS is enabled):

Plain %XDP_PASS on baseline, Page Pool driver:

src cpu Rx     drops  dst cpu Rx
  2.1 Mpps       N/A    2.1 Mpps

cpumap redirect (cross-core, w/o leaving its NUMA node) on baseline:

  6.8 Mpps  5.0 Mpps    1.8 Mpps

cpumap redirect with skb PP recycling:

  7.9 Mpps  5.7 Mpps    2.2 Mpps
                       +22% (from cpumap redir on baseline)

[0] https://github.com/alobakin/linux/commits/iavf-xdp
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 283b40c5 d4e49233
...@@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) ...@@ -5069,12 +5069,12 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb)
#endif #endif
} }
#ifdef CONFIG_PAGE_POOL
static inline void skb_mark_for_recycle(struct sk_buff *skb) static inline void skb_mark_for_recycle(struct sk_buff *skb)
{ {
#ifdef CONFIG_PAGE_POOL
skb->pp_recycle = 1; skb->pp_recycle = 1;
}
#endif #endif
}
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_SKBUFF_H */ #endif /* _LINUX_SKBUFF_H */
...@@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); ...@@ -317,35 +317,6 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq);
void xdp_return_frame_bulk(struct xdp_frame *xdpf, void xdp_return_frame_bulk(struct xdp_frame *xdpf,
struct xdp_frame_bulk *bq); struct xdp_frame_bulk *bq);
/* When sending xdp_frame into the network stack, then there is no
* return point callback, which is needed to release e.g. DMA-mapping
* resources with page_pool. Thus, have explicit function to release
* frame resources.
*/
void __xdp_release_frame(void *data, struct xdp_mem_info *mem);
static inline void xdp_release_frame(struct xdp_frame *xdpf)
{
struct xdp_mem_info *mem = &xdpf->mem;
struct skb_shared_info *sinfo;
int i;
/* Curr only page_pool needs this */
if (mem->type != MEM_TYPE_PAGE_POOL)
return;
if (likely(!xdp_frame_has_frags(xdpf)))
goto out;
sinfo = xdp_get_shared_info_from_frame(xdpf);
for (i = 0; i < sinfo->nr_frags; i++) {
struct page *page = skb_frag_page(&sinfo->frags[i]);
__xdp_release_frame(page_address(page), mem);
}
out:
__xdp_release_frame(xdpf->data, mem);
}
static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf)
{ {
struct skb_shared_info *sinfo; struct skb_shared_info *sinfo;
......
...@@ -531,21 +531,6 @@ void xdp_return_buff(struct xdp_buff *xdp) ...@@ -531,21 +531,6 @@ void xdp_return_buff(struct xdp_buff *xdp)
} }
EXPORT_SYMBOL_GPL(xdp_return_buff); EXPORT_SYMBOL_GPL(xdp_return_buff);
/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
{
struct xdp_mem_allocator *xa;
struct page *page;
rcu_read_lock();
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
if (xa)
page_pool_release_page(xa->page_pool, page);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(__xdp_release_frame);
void xdp_attachment_setup(struct xdp_attachment_info *info, void xdp_attachment_setup(struct xdp_attachment_info *info,
struct netdev_bpf *bpf) struct netdev_bpf *bpf)
{ {
...@@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, ...@@ -658,8 +643,8 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
* - RX ring dev queue index (skb_record_rx_queue) * - RX ring dev queue index (skb_record_rx_queue)
*/ */
/* Until page_pool get SKB return path, release DMA here */ if (xdpf->mem.type == MEM_TYPE_PAGE_POOL)
xdp_release_frame(xdpf); skb_mark_for_recycle(skb);
/* Allow SKB to reuse area used by xdp_frame */ /* Allow SKB to reuse area used by xdp_frame */
xdp_scrub_frame(xdpf); xdp_scrub_frame(xdpf);
......
...@@ -4,6 +4,19 @@ ...@@ -4,6 +4,19 @@
#define ETH_ALEN 6 #define ETH_ALEN 6
#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr)) #define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
/**
* enum frame_mark - magics to distinguish page/packet paths
* @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
* @MARK_IN: frame is being processed by the input XDP prog.
* @MARK_SKB: frame did hit the TC ingress hook as an skb.
*/
enum frame_mark {
MARK_XMIT = 0U,
MARK_IN = 0x42,
MARK_SKB = 0x45,
};
const volatile int ifindex_out; const volatile int ifindex_out;
const volatile int ifindex_in; const volatile int ifindex_in;
const volatile __u8 expect_dst[ETH_ALEN]; const volatile __u8 expect_dst[ETH_ALEN];
...@@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp) ...@@ -34,10 +47,10 @@ int xdp_redirect(struct xdp_md *xdp)
if (*metadata != 0x42) if (*metadata != 0x42)
return XDP_ABORTED; return XDP_ABORTED;
if (*payload == 0) { if (*payload == MARK_XMIT)
*payload = 0x42;
pkts_seen_zero++; pkts_seen_zero++;
}
*payload = MARK_IN;
if (bpf_xdp_adjust_meta(xdp, 4)) if (bpf_xdp_adjust_meta(xdp, 4))
return XDP_ABORTED; return XDP_ABORTED;
...@@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp) ...@@ -51,7 +64,7 @@ int xdp_redirect(struct xdp_md *xdp)
return ret; return ret;
} }
static bool check_pkt(void *data, void *data_end) static bool check_pkt(void *data, void *data_end, const __u32 mark)
{ {
struct ipv6hdr *iph = data + sizeof(struct ethhdr); struct ipv6hdr *iph = data + sizeof(struct ethhdr);
__u8 *payload = data + HDR_SZ; __u8 *payload = data + HDR_SZ;
...@@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end) ...@@ -59,13 +72,13 @@ static bool check_pkt(void *data, void *data_end)
if (payload + 1 > data_end) if (payload + 1 > data_end)
return false; return false;
if (iph->nexthdr != IPPROTO_UDP || *payload != 0x42) if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
return false; return false;
/* reset the payload so the same packet doesn't get counted twice when /* reset the payload so the same packet doesn't get counted twice when
* it cycles back through the kernel path and out the dst veth * it cycles back through the kernel path and out the dst veth
*/ */
*payload = 0; *payload = mark;
return true; return true;
} }
...@@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp) ...@@ -75,11 +88,11 @@ int xdp_count_pkts(struct xdp_md *xdp)
void *data = (void *)(long)xdp->data; void *data = (void *)(long)xdp->data;
void *data_end = (void *)(long)xdp->data_end; void *data_end = (void *)(long)xdp->data_end;
if (check_pkt(data, data_end)) if (check_pkt(data, data_end, MARK_XMIT))
pkts_seen_xdp++; pkts_seen_xdp++;
/* Return XDP_DROP to make sure the data page is recycled, like when it /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
* exits a physical NIC. Recycled pages will be counted in the * it exited a physical NIC. Those pages will be counted in the
* pkts_seen_zero counter above. * pkts_seen_zero counter above.
*/ */
return XDP_DROP; return XDP_DROP;
...@@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb) ...@@ -91,9 +104,12 @@ int tc_count_pkts(struct __sk_buff *skb)
void *data = (void *)(long)skb->data; void *data = (void *)(long)skb->data;
void *data_end = (void *)(long)skb->data_end; void *data_end = (void *)(long)skb->data_end;
if (check_pkt(data, data_end)) if (check_pkt(data, data_end, MARK_SKB))
pkts_seen_tc++; pkts_seen_tc++;
/* Will be either recycled or freed, %MARK_SKB makes sure it won't
* hit any of the counters above.
*/
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment