Commit 3134b9f0 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-mitigate-kmem_free-slowpath'

Jesper Dangaard Brouer says:

====================
net: mitigating kmem_cache free slowpath

This patchset is the first real use-case for kmem_cache bulk _free_.
The use of bulk _alloc_ is NOT included in this patchset. The full use
have previously been posted here [1].

The bulk free side have the largest benefit for the network stack
use-case, because network stack is hitting the kmem_cache/SLUB
slowpath when freeing SKBs, due to the amount of outstanding SKBs.
This is solved by using the new API kmem_cache_free_bulk().

Introduce new API napi_consume_skb(), that hides/handles bulk freeing
for the caller.  The drivers simply need to use this call when freeing
SKBs in NAPI context, e.g. replacing their calles to dev_kfree_skb() /
dev_consume_skb_any().

Driver ixgbe is the first user of this new API.

[1] http://thread.gmane.org/gmane.linux.network/384302/focus=397373
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 18ac5590 a3a8749d
...@@ -1089,7 +1089,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) ...@@ -1089,7 +1089,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter)
* @tx_ring: tx ring to clean * @tx_ring: tx ring to clean
**/ **/
static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring *tx_ring) struct ixgbe_ring *tx_ring, int napi_budget)
{ {
struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_adapter *adapter = q_vector->adapter;
struct ixgbe_tx_buffer *tx_buffer; struct ixgbe_tx_buffer *tx_buffer;
...@@ -1127,7 +1127,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, ...@@ -1127,7 +1127,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
total_packets += tx_buffer->gso_segs; total_packets += tx_buffer->gso_segs;
/* free the skb */ /* free the skb */
dev_consume_skb_any(tx_buffer->skb); napi_consume_skb(tx_buffer->skb, napi_budget);
/* unmap skb header data */ /* unmap skb header data */
dma_unmap_single(tx_ring->dev, dma_unmap_single(tx_ring->dev,
...@@ -2784,7 +2784,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget) ...@@ -2784,7 +2784,7 @@ int ixgbe_poll(struct napi_struct *napi, int budget)
#endif #endif
ixgbe_for_each_ring(ring, q_vector->tx) ixgbe_for_each_ring(ring, q_vector->tx)
clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring); clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring, budget);
/* Exit if we are called by netpoll or busy polling is active */ /* Exit if we are called by netpoll or busy polling is active */
if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector)) if ((budget <= 0) || !ixgbe_qv_lock_napi(q_vector))
......
...@@ -2404,6 +2404,10 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi, ...@@ -2404,6 +2404,10 @@ static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
{ {
return __napi_alloc_skb(napi, length, GFP_ATOMIC); return __napi_alloc_skb(napi, length, GFP_ATOMIC);
} }
void napi_consume_skb(struct sk_buff *skb, int budget);
void __kfree_skb_flush(void);
void __kfree_skb_defer(struct sk_buff *skb);
/** /**
* __dev_alloc_pages - allocate page for network Rx * __dev_alloc_pages - allocate page for network Rx
......
...@@ -3829,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h) ...@@ -3829,8 +3829,14 @@ static void net_tx_action(struct softirq_action *h)
trace_consume_skb(skb); trace_consume_skb(skb);
else else
trace_kfree_skb(skb, net_tx_action); trace_kfree_skb(skb, net_tx_action);
if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb); __kfree_skb(skb);
else
__kfree_skb_defer(skb);
} }
__kfree_skb_flush();
} }
if (sd->output_queue) { if (sd->output_queue) {
...@@ -5155,6 +5161,7 @@ static void net_rx_action(struct softirq_action *h) ...@@ -5155,6 +5161,7 @@ static void net_rx_action(struct softirq_action *h)
} }
} }
__kfree_skb_flush();
local_irq_disable(); local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list); list_splice_tail_init(&sd->poll_list, &list);
......
...@@ -347,8 +347,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) ...@@ -347,8 +347,16 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size)
} }
EXPORT_SYMBOL(build_skb); EXPORT_SYMBOL(build_skb);
#define NAPI_SKB_CACHE_SIZE 64
struct napi_alloc_cache {
struct page_frag_cache page;
size_t skb_count;
void *skb_cache[NAPI_SKB_CACHE_SIZE];
};
static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{ {
...@@ -378,9 +386,9 @@ EXPORT_SYMBOL(netdev_alloc_frag); ...@@ -378,9 +386,9 @@ EXPORT_SYMBOL(netdev_alloc_frag);
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
{ {
struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
return __alloc_page_frag(nc, fragsz, gfp_mask); return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
} }
void *napi_alloc_frag(unsigned int fragsz) void *napi_alloc_frag(unsigned int fragsz)
...@@ -474,7 +482,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb); ...@@ -474,7 +482,7 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
gfp_t gfp_mask) gfp_t gfp_mask)
{ {
struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache); struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
struct sk_buff *skb; struct sk_buff *skb;
void *data; void *data;
...@@ -494,7 +502,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, ...@@ -494,7 +502,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
if (sk_memalloc_socks()) if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC; gfp_mask |= __GFP_MEMALLOC;
data = __alloc_page_frag(nc, len, gfp_mask); data = __alloc_page_frag(&nc->page, len, gfp_mask);
if (unlikely(!data)) if (unlikely(!data))
return NULL; return NULL;
...@@ -505,7 +513,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, ...@@ -505,7 +513,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
} }
/* use OR instead of assignment to avoid clearing of bits in mask */ /* use OR instead of assignment to avoid clearing of bits in mask */
if (nc->pfmemalloc) if (nc->page.pfmemalloc)
skb->pfmemalloc = 1; skb->pfmemalloc = 1;
skb->head_frag = 1; skb->head_frag = 1;
...@@ -747,6 +755,73 @@ void consume_skb(struct sk_buff *skb) ...@@ -747,6 +755,73 @@ void consume_skb(struct sk_buff *skb)
} }
EXPORT_SYMBOL(consume_skb); EXPORT_SYMBOL(consume_skb);
void __kfree_skb_flush(void)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
/* flush skb_cache if containing objects */
if (nc->skb_count) {
kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
nc->skb_cache);
nc->skb_count = 0;
}
}
static inline void _kfree_skb_defer(struct sk_buff *skb)
{
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
/* drop skb->head and call any destructors for packet */
skb_release_all(skb);
/* record skb to CPU local list */
nc->skb_cache[nc->skb_count++] = skb;
#ifdef CONFIG_SLUB
/* SLUB writes into objects when freeing */
prefetchw(skb);
#endif
/* flush skb_cache if it is filled */
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
nc->skb_cache);
nc->skb_count = 0;
}
}
void __kfree_skb_defer(struct sk_buff *skb)
{
_kfree_skb_defer(skb);
}
void napi_consume_skb(struct sk_buff *skb, int budget)
{
if (unlikely(!skb))
return;
/* if budget is 0 assume netpoll w/ IRQs disabled */
if (unlikely(!budget)) {
dev_consume_skb_irq(skb);
return;
}
if (likely(atomic_read(&skb->users) == 1))
smp_rmb();
else if (likely(!atomic_dec_and_test(&skb->users)))
return;
/* if reaching here SKB is ready to free */
trace_consume_skb(skb);
/* if SKB is a clone, don't handle this case */
if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
__kfree_skb(skb);
return;
}
_kfree_skb_defer(skb);
}
EXPORT_SYMBOL(napi_consume_skb);
/* Make sure a field is enclosed inside headers_start/headers_end section */ /* Make sure a field is enclosed inside headers_start/headers_end section */
#define CHECK_SKB_FIELD(field) \ #define CHECK_SKB_FIELD(field) \
BUILD_BUG_ON(offsetof(struct sk_buff, field) < \ BUILD_BUG_ON(offsetof(struct sk_buff, field) < \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment