Commit 4ddd6375 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'net-rps-rfs-improvements'

Eric Dumazet says:

====================
net: rps/rfs improvements

Jason Xing attempted to optimize napi_schedule_rps() by avoiding
unneeded NET_RX_SOFTIRQ raises: [1], [2]

This is quite complex to implement properly. I chose to implement
the idea, and added a similar optimization in ____napi_schedule()

Overall, in an intensive RPC workload, with 32 TX/RX queues with RFS
I was able to observe a ~10% reduction of NET_RX_SOFTIRQ
invocations.

While this had no impact on throughput or cpu costs on this synthetic
benchmark, we know that firing NET_RX_SOFTIRQ from softirq handler
can force __do_softirq() to wakeup ksoftirqd when need_resched() is true.
This can have a latency impact on stressed hosts.

[1] https://lore.kernel.org/lkml/20230325152417.5403-1-kerneljasonxing@gmail.com/
[2] https://lore.kernel.org/netdev/20230328142112.12493-1-kerneljasonxing@gmail.com/
====================

Link: https://lore.kernel.org/r/20230328235021.1048163-1-edumazet@google.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 7079d5e6 8b43fd3d
......@@ -3188,6 +3188,7 @@ struct softnet_data {
#ifdef CONFIG_RPS
struct softnet_data *rps_ipi_list;
#endif
bool in_net_rx_action;
#ifdef CONFIG_NET_FLOW_LIMIT
struct sd_flow_limit __rcu *flow_limit;
#endif
......
......@@ -4360,7 +4360,11 @@ static inline void ____napi_schedule(struct softnet_data *sd,
}
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
/* If not called from net_rx_action()
* we have to raise NET_RX_SOFTIRQ.
*/
if (!sd->in_net_rx_action)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
#ifdef CONFIG_RPS
......@@ -4582,11 +4586,16 @@ static void trigger_rx_softirq(void *data)
}
/*
* Check if this softnet_data structure is another cpu one
* If yes, queue it to our IPI list and return 1
* If no, return 0
* After we queued a packet into sd->input_pkt_queue,
* we need to make sure this queue is serviced soon.
*
* - If this is another cpu queue, link it to our rps_ipi_list,
* and make sure we will process rps_ipi_list from net_rx_action().
*
* - If this is our own queue, NAPI schedule our backlog.
* Note that this also raises NET_RX_SOFTIRQ.
*/
static int napi_schedule_rps(struct softnet_data *sd)
static void napi_schedule_rps(struct softnet_data *sd)
{
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
......@@ -4595,12 +4604,15 @@ static int napi_schedule_rps(struct softnet_data *sd)
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
return 1;
/* If not called from net_rx_action()
* we have to raise NET_RX_SOFTIRQ.
*/
if (!mysd->in_net_rx_action)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
return;
}
#endif /* CONFIG_RPS */
__napi_schedule_irqoff(&mysd->backlog);
return 0;
}
#ifdef CONFIG_NET_FLOW_LIMIT
......@@ -6640,6 +6652,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
LIST_HEAD(list);
LIST_HEAD(repoll);
start:
sd->in_net_rx_action = true;
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
......@@ -6650,8 +6664,18 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
skb_defer_free_flush(sd);
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
goto end;
if (list_empty(&repoll)) {
sd->in_net_rx_action = false;
barrier();
/* We need to check if ____napi_schedule()
* had refilled poll_list while
* sd->in_net_rx_action was true.
*/
if (!list_empty(&sd->poll_list))
goto start;
if (!sd_has_rps_ipi_waiting(sd))
goto end;
}
break;
}
......@@ -6676,6 +6700,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
list_splice(&list, &sd->poll_list);
if (!list_empty(&sd->poll_list))
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
else
sd->in_net_rx_action = false;
net_rps_action_and_irq_enable(sd);
end:;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment