Commit 88340160 authored by Martin KaFai Lau's avatar Martin KaFai Lau Committed by David S. Miller

ip_tunnel: Create percpu gro_cell

In the ipip tunnel, the skb->queue_mapping is lost in ipip_rcv().
All skb will be queued to the same cell->napi_skbs.  The
gro_cell_poll is pinned to one core under load.  In production traffic,
we also see severe rx_dropped in the tunl iface and it is probably due to
this limit: skb_queue_len(&cell->napi_skbs) > netdev_max_backlog.

This patch is trying to alloc_percpu(struct gro_cell) and schedule
gro_cell_poll to process the skb in the same core.
Signed-off-by: default avatarMartin KaFai Lau <kafai@fb.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f2bbca51
...@@ -8,25 +8,23 @@ ...@@ -8,25 +8,23 @@
struct gro_cell { struct gro_cell {
struct sk_buff_head napi_skbs; struct sk_buff_head napi_skbs;
struct napi_struct napi; struct napi_struct napi;
} ____cacheline_aligned_in_smp; };
struct gro_cells { struct gro_cells {
unsigned int gro_cells_mask; struct gro_cell __percpu *cells;
struct gro_cell *cells;
}; };
static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
{ {
struct gro_cell *cell = gcells->cells; struct gro_cell *cell;
struct net_device *dev = skb->dev; struct net_device *dev = skb->dev;
if (!cell || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) { if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
netif_rx(skb); netif_rx(skb);
return; return;
} }
if (skb_rx_queue_recorded(skb)) cell = this_cpu_ptr(gcells->cells);
cell += skb_get_rx_queue(skb) & gcells->gro_cells_mask;
if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
atomic_long_inc(&dev->rx_dropped); atomic_long_inc(&dev->rx_dropped);
...@@ -72,15 +70,12 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de ...@@ -72,15 +70,12 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de
{ {
int i; int i;
gcells->gro_cells_mask = roundup_pow_of_two(netif_get_num_default_rss_queues()) - 1; gcells->cells = alloc_percpu(struct gro_cell);
gcells->cells = kcalloc(gcells->gro_cells_mask + 1,
sizeof(struct gro_cell),
GFP_KERNEL);
if (!gcells->cells) if (!gcells->cells)
return -ENOMEM; return -ENOMEM;
for (i = 0; i <= gcells->gro_cells_mask; i++) { for_each_possible_cpu(i) {
struct gro_cell *cell = gcells->cells + i; struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
skb_queue_head_init(&cell->napi_skbs); skb_queue_head_init(&cell->napi_skbs);
netif_napi_add(dev, &cell->napi, gro_cell_poll, 64); netif_napi_add(dev, &cell->napi, gro_cell_poll, 64);
...@@ -91,16 +86,16 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de ...@@ -91,16 +86,16 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de
static inline void gro_cells_destroy(struct gro_cells *gcells) static inline void gro_cells_destroy(struct gro_cells *gcells)
{ {
struct gro_cell *cell = gcells->cells;
int i; int i;
if (!cell) if (!gcells->cells)
return; return;
for (i = 0; i <= gcells->gro_cells_mask; i++,cell++) { for_each_possible_cpu(i) {
struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
netif_napi_del(&cell->napi); netif_napi_del(&cell->napi);
skb_queue_purge(&cell->napi_skbs); skb_queue_purge(&cell->napi_skbs);
} }
kfree(gcells->cells); free_percpu(gcells->cells);
gcells->cells = NULL; gcells->cells = NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment