Commit 193d0002 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bulk-cpumap-redirect'

Jesper Dangaard Brouer says:

====================
This patchset utilize a number of different kernel bulk APIs for optimizing
the performance for the XDP cpumap redirect feature.

Benchmark details are available here:
 https://github.com/xdp-project/xdp-project/blob/master/areas/cpumap/cpumap03-optimizations.org

Performance measurements can be considered micro benchmarks, as they measure
dropping packets at different stages in the network stack.
Summary based on above:

Baseline benchmarks
- baseline-redirect: UdpNoPorts: 3,180,074
- baseline-redirect: iptables-raw drop: 6,193,534

Patch1: bpf: cpumap use ptr_ring_consume_batched
- redirect: UdpNoPorts: 3,327,729
- redirect: iptables-raw drop: 6,321,540

Patch2: net: core: introduce build_skb_around
- redirect: UdpNoPorts: 3,221,303
- redirect: iptables-raw drop: 6,320,066

Patch3: bpf: cpumap do bulk allocation of SKBs
- redirect: UdpNoPorts: 3,290,563
- redirect: iptables-raw drop: 6,650,112

Patch4: bpf: cpumap memory prefetchw optimizations for struct page
- redirect: UdpNoPorts: 3,520,250
- redirect: iptables-raw drop: 7,649,604

In this V2 submission I have chosen drop the SKB-list patch using
netif_receive_skb_list() as it was not showing a performance improvement for
these micro benchmarks.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 00967e84 86d23145
...@@ -1042,6 +1042,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, ...@@ -1042,6 +1042,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags,
int node); int node);
struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *__build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size);
struct sk_buff *build_skb_around(struct sk_buff *skb,
void *data, unsigned int frag_size);
/** /**
* alloc_skb - allocate a network buffer * alloc_skb - allocate a network buffer
......
...@@ -160,12 +160,12 @@ static void cpu_map_kthread_stop(struct work_struct *work) ...@@ -160,12 +160,12 @@ static void cpu_map_kthread_stop(struct work_struct *work)
} }
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
struct xdp_frame *xdpf) struct xdp_frame *xdpf,
struct sk_buff *skb)
{ {
unsigned int hard_start_headroom; unsigned int hard_start_headroom;
unsigned int frame_size; unsigned int frame_size;
void *pkt_data_start; void *pkt_data_start;
struct sk_buff *skb;
/* Part of headroom was reserved to xdpf */ /* Part of headroom was reserved to xdpf */
hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom; hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
...@@ -191,8 +191,8 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, ...@@ -191,8 +191,8 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
pkt_data_start = xdpf->data - hard_start_headroom; pkt_data_start = xdpf->data - hard_start_headroom;
skb = build_skb(pkt_data_start, frame_size); skb = build_skb_around(skb, pkt_data_start, frame_size);
if (!skb) if (unlikely(!skb))
return NULL; return NULL;
skb_reserve(skb, hard_start_headroom); skb_reserve(skb, hard_start_headroom);
...@@ -240,6 +240,8 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) ...@@ -240,6 +240,8 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
} }
} }
#define CPUMAP_BATCH 8
static int cpu_map_kthread_run(void *data) static int cpu_map_kthread_run(void *data)
{ {
struct bpf_cpu_map_entry *rcpu = data; struct bpf_cpu_map_entry *rcpu = data;
...@@ -252,8 +254,11 @@ static int cpu_map_kthread_run(void *data) ...@@ -252,8 +254,11 @@ static int cpu_map_kthread_run(void *data)
* kthread_stop signal until queue is empty. * kthread_stop signal until queue is empty.
*/ */
while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) { while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
unsigned int processed = 0, drops = 0, sched = 0; unsigned int drops = 0, sched = 0;
struct xdp_frame *xdpf; void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH];
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
int i, n, m;
/* Release CPU reschedule checks */ /* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) { if (__ptr_ring_empty(rcpu->queue)) {
...@@ -269,18 +274,38 @@ static int cpu_map_kthread_run(void *data) ...@@ -269,18 +274,38 @@ static int cpu_map_kthread_run(void *data)
sched = cond_resched(); sched = cond_resched();
} }
/* Process packets in rcpu->queue */
local_bh_disable();
/* /*
* The bpf_cpu_map_entry is single consumer, with this * The bpf_cpu_map_entry is single consumer, with this
* kthread CPU pinned. Lockless access to ptr_ring * kthread CPU pinned. Lockless access to ptr_ring
* consume side valid as no-resize allowed of queue. * consume side valid as no-resize allowed of queue.
*/ */
while ((xdpf = __ptr_ring_consume(rcpu->queue))) { n = ptr_ring_consume_batched(rcpu->queue, frames, CPUMAP_BATCH);
struct sk_buff *skb;
for (i = 0; i < n; i++) {
void *f = frames[i];
struct page *page = virt_to_page(f);
/* Bring struct page memory area to curr CPU. Read by
* build_skb_around via page_is_pfmemalloc(), and when
* freed written by page_frag_free call.
*/
prefetchw(page);
}
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, n, skbs);
if (unlikely(m == 0)) {
for (i = 0; i < n; i++)
skbs[i] = NULL; /* effect: xdp_return_frame */
drops = n;
}
local_bh_disable();
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
struct sk_buff *skb = skbs[i];
int ret; int ret;
skb = cpu_map_build_skb(rcpu, xdpf); skb = cpu_map_build_skb(rcpu, xdpf, skb);
if (!skb) { if (!skb) {
xdp_return_frame(xdpf); xdp_return_frame(xdpf);
continue; continue;
...@@ -290,13 +315,9 @@ static int cpu_map_kthread_run(void *data) ...@@ -290,13 +315,9 @@ static int cpu_map_kthread_run(void *data)
ret = netif_receive_skb_core(skb); ret = netif_receive_skb_core(skb);
if (ret == NET_RX_DROP) if (ret == NET_RX_DROP)
drops++; drops++;
/* Limit BH-disable period */
if (++processed == 8)
break;
} }
/* Feedback loop via tracepoint */ /* Feedback loop via tracepoint */
trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched); trace_xdp_cpumap_kthread(rcpu->map_id, n, drops, sched);
local_bh_enable(); /* resched point, may call do_softirq() */ local_bh_enable(); /* resched point, may call do_softirq() */
} }
......
...@@ -258,6 +258,33 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, ...@@ -258,6 +258,33 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
} }
EXPORT_SYMBOL(__alloc_skb); EXPORT_SYMBOL(__alloc_skb);
/* Caller must provide SKB that is memset cleared */
static struct sk_buff *__build_skb_around(struct sk_buff *skb,
void *data, unsigned int frag_size)
{
struct skb_shared_info *shinfo;
unsigned int size = frag_size ? : ksize(data);
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
/* Assumes caller memset cleared SKB */
skb->truesize = SKB_TRUESIZE(size);
refcount_set(&skb->users, 1);
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
/* make sure we initialize shinfo sequentially */
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
return skb;
}
/** /**
* __build_skb - build a network buffer * __build_skb - build a network buffer
* @data: data buffer provided by caller * @data: data buffer provided by caller
...@@ -279,32 +306,15 @@ EXPORT_SYMBOL(__alloc_skb); ...@@ -279,32 +306,15 @@ EXPORT_SYMBOL(__alloc_skb);
*/ */
struct sk_buff *__build_skb(void *data, unsigned int frag_size) struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{ {
struct skb_shared_info *shinfo;
struct sk_buff *skb; struct sk_buff *skb;
unsigned int size = frag_size ? : ksize(data);
skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
if (!skb) if (unlikely(!skb))
return NULL; return NULL;
size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
memset(skb, 0, offsetof(struct sk_buff, tail)); memset(skb, 0, offsetof(struct sk_buff, tail));
skb->truesize = SKB_TRUESIZE(size);
refcount_set(&skb->users, 1);
skb->head = data;
skb->data = data;
skb_reset_tail_pointer(skb);
skb->end = skb->tail + size;
skb->mac_header = (typeof(skb->mac_header))~0U;
skb->transport_header = (typeof(skb->transport_header))~0U;
/* make sure we initialize shinfo sequentially */ return __build_skb_around(skb, data, frag_size);
shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);
return skb;
} }
/* build_skb() is wrapper over __build_skb(), that specifically /* build_skb() is wrapper over __build_skb(), that specifically
...@@ -325,6 +335,29 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) ...@@ -325,6 +335,29 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
} }
EXPORT_SYMBOL(build_skb); EXPORT_SYMBOL(build_skb);
/**
* build_skb_around - build a network buffer around provided skb
* @skb: sk_buff provide by caller, must be memset cleared
* @data: data buffer provided by caller
* @frag_size: size of data, or 0 if head was kmalloced
*/
struct sk_buff *build_skb_around(struct sk_buff *skb,
void *data, unsigned int frag_size)
{
if (unlikely(!skb))
return NULL;
skb = __build_skb_around(skb, data, frag_size);
if (skb && frag_size) {
skb->head_frag = 1;
if (page_is_pfmemalloc(virt_to_head_page(data)))
skb->pfmemalloc = 1;
}
return skb;
}
EXPORT_SYMBOL(build_skb_around);
#define NAPI_SKB_CACHE_SIZE 64 #define NAPI_SKB_CACHE_SIZE 64
struct napi_alloc_cache { struct napi_alloc_cache {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment