Commit d75b1ade authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: less interrupt masking in NAPI

net_rx_action() can mask irqs a single time to transfert sd->poll_list
into a private list, for a very short duration.

Then, napi_complete() can avoid masking irqs again,
and net_rx_action() only needs to mask irq again in slow path.

This patch removes 2 couples of irq mask/unmask per typical NAPI run,
more if multiple napi were triggered.

Note this also allows to give control back to caller (do_softirq())
more often, so that other softirq handlers can be called a bit earlier,
or ksoftirqd can be wakeup earlier under pressure.

This was developed while testing an alternative to RX interrupt
mitigation to reduce latencies while keeping or improving GRO
aggregation on fast NIC.

Idea is to test napi->gro_list at the end of a napi->poll() and
reschedule one NAPI poll, but after servicing a full round of
softirqs (timers, TX, rcu, ...). This will be allowed only if softirq
is currently serviced by idle task or ksoftirqd, and resched not needed.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4cdb1e2e
...@@ -4316,20 +4316,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) ...@@ -4316,20 +4316,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
local_irq_enable(); local_irq_enable();
} }
static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
return sd->rps_ipi_list != NULL;
#else
return false;
#endif
}
static int process_backlog(struct napi_struct *napi, int quota) static int process_backlog(struct napi_struct *napi, int quota)
{ {
int work = 0; int work = 0;
struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
#ifdef CONFIG_RPS
/* Check if we have pending ipi, its better to send them now, /* Check if we have pending ipi, its better to send them now,
* not waiting net_rx_action() end. * not waiting net_rx_action() end.
*/ */
if (sd->rps_ipi_list) { if (sd_has_rps_ipi_waiting(sd)) {
local_irq_disable(); local_irq_disable();
net_rps_action_and_irq_enable(sd); net_rps_action_and_irq_enable(sd);
} }
#endif
napi->weight = weight_p; napi->weight = weight_p;
local_irq_disable(); local_irq_disable();
while (1) { while (1) {
...@@ -4356,7 +4364,6 @@ static int process_backlog(struct napi_struct *napi, int quota) ...@@ -4356,7 +4364,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
* We can use a plain write instead of clear_bit(), * We can use a plain write instead of clear_bit(),
* and we dont need an smp_mb() memory barrier. * and we dont need an smp_mb() memory barrier.
*/ */
list_del(&napi->poll_list);
napi->state = 0; napi->state = 0;
rps_unlock(sd); rps_unlock(sd);
...@@ -4406,7 +4413,7 @@ void __napi_complete(struct napi_struct *n) ...@@ -4406,7 +4413,7 @@ void __napi_complete(struct napi_struct *n)
BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
BUG_ON(n->gro_list); BUG_ON(n->gro_list);
list_del(&n->poll_list); list_del_init(&n->poll_list);
smp_mb__before_atomic(); smp_mb__before_atomic();
clear_bit(NAPI_STATE_SCHED, &n->state); clear_bit(NAPI_STATE_SCHED, &n->state);
} }
...@@ -4424,9 +4431,15 @@ void napi_complete(struct napi_struct *n) ...@@ -4424,9 +4431,15 @@ void napi_complete(struct napi_struct *n)
return; return;
napi_gro_flush(n, false); napi_gro_flush(n, false);
local_irq_save(flags);
__napi_complete(n); if (likely(list_empty(&n->poll_list))) {
local_irq_restore(flags); WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
} else {
/* If n->poll_list is not empty, we need to mask irqs */
local_irq_save(flags);
__napi_complete(n);
local_irq_restore(flags);
}
} }
EXPORT_SYMBOL(napi_complete); EXPORT_SYMBOL(napi_complete);
...@@ -4520,29 +4533,28 @@ static void net_rx_action(struct softirq_action *h) ...@@ -4520,29 +4533,28 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = this_cpu_ptr(&softnet_data); struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies + 2; unsigned long time_limit = jiffies + 2;
int budget = netdev_budget; int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
void *have; void *have;
local_irq_disable(); local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
while (!list_empty(&sd->poll_list)) { while (!list_empty(&list)) {
struct napi_struct *n; struct napi_struct *n;
int work, weight; int work, weight;
/* If softirq window is exhuasted then punt. /* If softirq window is exhausted then punt.
* Allow this to run for 2 jiffies since which will allow * Allow this to run for 2 jiffies since which will allow
* an average latency of 1.5/HZ. * an average latency of 1.5/HZ.
*/ */
if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
goto softnet_break; goto softnet_break;
local_irq_enable();
/* Even though interrupts have been re-enabled, this n = list_first_entry(&list, struct napi_struct, poll_list);
* access is safe because interrupts can only add new list_del_init(&n->poll_list);
* entries to the tail of this list, and only ->poll()
* calls can remove this head entry from the list.
*/
n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
have = netpoll_poll_lock(n); have = netpoll_poll_lock(n);
...@@ -4564,8 +4576,6 @@ static void net_rx_action(struct softirq_action *h) ...@@ -4564,8 +4576,6 @@ static void net_rx_action(struct softirq_action *h)
budget -= work; budget -= work;
local_irq_disable();
/* Drivers must not modify the NAPI state if they /* Drivers must not modify the NAPI state if they
* consume the entire weight. In such cases this code * consume the entire weight. In such cases this code
* still "owns" the NAPI instance and therefore can * still "owns" the NAPI instance and therefore can
...@@ -4573,32 +4583,40 @@ static void net_rx_action(struct softirq_action *h) ...@@ -4573,32 +4583,40 @@ static void net_rx_action(struct softirq_action *h)
*/ */
if (unlikely(work == weight)) { if (unlikely(work == weight)) {
if (unlikely(napi_disable_pending(n))) { if (unlikely(napi_disable_pending(n))) {
local_irq_enable();
napi_complete(n); napi_complete(n);
local_irq_disable();
} else { } else {
if (n->gro_list) { if (n->gro_list) {
/* flush too old packets /* flush too old packets
* If HZ < 1000, flush all packets. * If HZ < 1000, flush all packets.
*/ */
local_irq_enable();
napi_gro_flush(n, HZ >= 1000); napi_gro_flush(n, HZ >= 1000);
local_irq_disable();
} }
list_move_tail(&n->poll_list, &sd->poll_list); list_add_tail(&n->poll_list, &repoll);
} }
} }
netpoll_poll_unlock(have); netpoll_poll_unlock(have);
} }
if (!sd_has_rps_ipi_waiting(sd) &&
list_empty(&list) &&
list_empty(&repoll))
return;
out: out:
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
list_splice_tail(&repoll, &list);
list_splice(&list, &sd->poll_list);
if (!list_empty(&sd->poll_list))
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd); net_rps_action_and_irq_enable(sd);
return; return;
softnet_break: softnet_break:
sd->time_squeeze++; sd->time_squeeze++;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out; goto out;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment