Commit ce27ec60 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: add high_order_alloc_disable sysctl/static key

>From linux-3.7, (commit 5640f768 "net: use a per task frag
allocator") TCP sendmsg() has preferred using order-3 allocations.

While it gives good results for most cases, we had reports
that heavy uses of TCP over loopback were hitting a spinlock
contention in page allocations/freeing.

This commits adds a sysctl so that admins can opt-in
for order-0 allocations. Hopefully mm layer might optimize
order-3 allocations in the future since it could give us
a nice boost  (see 8 lines of following benchmark)

The following benchmark shows a win when more than 8 TCP_STREAM
threads are running (56 x86 cores server in my tests)

for thr in {1..30}
do
 sysctl -wq net.core.high_order_alloc_disable=0
 T0=`./super_netperf $thr -H 127.0.0.1 -l 15`
 sysctl -wq net.core.high_order_alloc_disable=1
 T1=`./super_netperf $thr -H 127.0.0.1 -l 15`
 echo $thr:$T0:$T1
done

1: 49979: 37267
2: 98745: 76286
3: 141088: 110051
4: 177414: 144772
5: 197587: 173563
6: 215377: 208448
7: 241061: 234087
8: 267155: 263373
9: 295069: 297402
10: 312393: 335213
11: 340462: 368778
12: 371366: 403954
13: 412344: 443713
14: 426617: 473580
15: 474418: 507861
16: 503261: 538539
17: 522331: 563096
18: 532409: 567084
19: 550824: 605240
20: 525493: 641988
21: 564574: 665843
22: 567349: 690868
23: 583846: 710917
24: 588715: 736306
25: 603212: 763494
26: 604083: 792654
27: 602241: 796450
28: 604291: 797993
29: 611610: 833249
30: 577356: 841062
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0b7d7f6b
...@@ -2534,6 +2534,8 @@ extern int sysctl_optmem_max; ...@@ -2534,6 +2534,8 @@ extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default; extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default; extern __u32 sysctl_rmem_default;
DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto) static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{ {
/* Does this proto have per netns sysctl_wmem ? */ /* Does this proto have per netns sysctl_wmem ? */
......
...@@ -2320,6 +2320,7 @@ static void sk_leave_memory_pressure(struct sock *sk) ...@@ -2320,6 +2320,7 @@ static void sk_leave_memory_pressure(struct sock *sk)
/* On 32bit arches, an skb frag is limited to 2^15 */ /* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768) #define SKB_FRAG_PAGE_ORDER get_order(32768)
DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
/** /**
* skb_page_frag_refill - check that a page_frag contains enough room * skb_page_frag_refill - check that a page_frag contains enough room
...@@ -2344,7 +2345,8 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) ...@@ -2344,7 +2345,8 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
} }
pfrag->offset = 0; pfrag->offset = 0;
if (SKB_FRAG_PAGE_ORDER) { if (SKB_FRAG_PAGE_ORDER &&
!static_branch_unlikely(&net_high_order_alloc_disable_key)) {
/* Avoid direct reclaim but allow kswapd to wake */ /* Avoid direct reclaim but allow kswapd to wake */
pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP | __GFP_NOWARN | __GFP_COMP | __GFP_NOWARN |
......
...@@ -562,6 +562,13 @@ static struct ctl_table net_core_table[] = { ...@@ -562,6 +562,13 @@ static struct ctl_table net_core_table[] = {
.extra1 = &zero, .extra1 = &zero,
.extra2 = &two, .extra2 = &two,
}, },
{
.procname = "high_order_alloc_disable",
.data = &net_high_order_alloc_disable_key.key,
.maxlen = sizeof(net_high_order_alloc_disable_key),
.mode = 0644,
.proc_handler = proc_do_static_key,
},
{ } { }
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment