Commit b4ded832 authored by Alexander Duyck's avatar Alexander Duyck Committed by Jeff Kirsher

ixgbe: Update adaptive ITR algorithm

The following change is meant to update the adaptive ITR algorithm to
better support the needs of the network. Specifically with this change what
I have done is make it so that our ITR algorithm will try to prevent either
starving a socket buffer for memory in the case of Tx, or overrunning an Rx
socket buffer on receive.

In addition a side effect of the calculations used is that we should
function better with new features such as XDP which can handle small
packets at high rates without needing to lock us into NAPI polling mode.
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Tested-by: default avatarAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent c3aec05d
...@@ -435,8 +435,15 @@ static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) ...@@ -435,8 +435,15 @@ static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
} }
#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) #define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
#define IXGBE_ITR_ADAPTIVE_MIN_INC 2
#define IXGBE_ITR_ADAPTIVE_MIN_USECS 10
#define IXGBE_ITR_ADAPTIVE_MAX_USECS 126
#define IXGBE_ITR_ADAPTIVE_LATENCY 0x80
#define IXGBE_ITR_ADAPTIVE_BULK 0x00
struct ixgbe_ring_container { struct ixgbe_ring_container {
struct ixgbe_ring *ring; /* pointer to linked list of rings */ struct ixgbe_ring *ring; /* pointer to linked list of rings */
unsigned long next_update; /* jiffies value of last update */
unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */ unsigned int total_packets; /* total packets processed this int */
u16 work_limit; /* total work allowed per interrupt */ u16 work_limit; /* total work allowed per interrupt */
......
...@@ -806,6 +806,7 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring, ...@@ -806,6 +806,7 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring,
ring->next = head->ring; ring->next = head->ring;
head->ring = ring; head->ring = ring;
head->count++; head->count++;
head->next_update = jiffies + 1;
} }
/** /**
...@@ -879,8 +880,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, ...@@ -879,8 +880,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
/* initialize work limits */ /* initialize work limits */
q_vector->tx.work_limit = adapter->tx_work_limit; q_vector->tx.work_limit = adapter->tx_work_limit;
/* initialize pointer to rings */ /* Initialize setting for adaptive ITR */
ring = q_vector->ring; q_vector->tx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS |
IXGBE_ITR_ADAPTIVE_LATENCY;
q_vector->rx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS |
IXGBE_ITR_ADAPTIVE_LATENCY;
/* intialize ITR */ /* intialize ITR */
if (txr_count && !rxr_count) { if (txr_count && !rxr_count) {
...@@ -897,6 +901,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, ...@@ -897,6 +901,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
q_vector->itr = adapter->rx_itr_setting; q_vector->itr = adapter->rx_itr_setting;
} }
/* initialize pointer to rings */
ring = q_vector->ring;
while (txr_count) { while (txr_count) {
/* assign generic ring traits */ /* assign generic ring traits */
ring->dev = &adapter->pdev->dev; ring->dev = &adapter->pdev->dev;
......
...@@ -2540,50 +2540,174 @@ enum latency_range { ...@@ -2540,50 +2540,174 @@ enum latency_range {
static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring_container *ring_container) struct ixgbe_ring_container *ring_container)
{ {
int bytes = ring_container->total_bytes; unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
int packets = ring_container->total_packets; IXGBE_ITR_ADAPTIVE_LATENCY;
u32 timepassed_us; unsigned int avg_wire_size, packets, bytes;
u64 bytes_perint; unsigned long next_update = jiffies;
u8 itr_setting = ring_container->itr;
if (packets == 0) /* If we don't have any rings just leave ourselves set for maximum
* possible latency so we take ourselves out of the equation.
*/
if (!ring_container->ring)
return; return;
/* simple throttlerate management /* If we didn't update within up to 1 - 2 jiffies we can assume
* 0-10MB/s lowest (100000 ints/s) * that either packets are coming in so slow there hasn't been
* 10-20MB/s low (20000 ints/s) * any work, or that there is so much work that NAPI is dealing
* 20-1249MB/s bulk (12000 ints/s) * with interrupt moderation and we don't need to do anything.
*/ */
/* what was last interrupt timeslice? */ if (time_after(next_update, ring_container->next_update))
timepassed_us = q_vector->itr >> 2; goto clear_counts;
if (timepassed_us == 0)
return;
bytes_perint = bytes / timepassed_us; /* bytes/usec */ packets = ring_container->total_packets;
switch (itr_setting) { /* We have no packets to actually measure against. This means
case lowest_latency: * either one of the other queues on this vector is active or
if (bytes_perint > 10) * we are a Tx queue doing TSO with too high of an interrupt rate.
itr_setting = low_latency; *
break; * When this occurs just tick up our delay by the minimum value
case low_latency: * and hope that this extra delay will prevent us from being called
if (bytes_perint > 20) * without any work on our queue.
itr_setting = bulk_latency; */
else if (bytes_perint <= 10) if (!packets) {
itr_setting = lowest_latency; itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
goto clear_counts;
}
bytes = ring_container->total_bytes;
/* If packets are less than 4 or bytes are less than 9000 assume
* insufficient data to use bulk rate limiting approach. We are
* likely latency driven.
*/
if (packets < 4 && bytes < 9000) {
itr = IXGBE_ITR_ADAPTIVE_LATENCY;
goto adjust_by_size;
}
/* Between 4 and 48 we can assume that our current interrupt delay
* is only slightly too low. As such we should increase it by a small
* fixed amount.
*/
if (packets < 48) {
itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
goto clear_counts;
}
/* Between 48 and 96 is our "goldilocks" zone where we are working
* out "just right". Just report that our current ITR is good for us.
*/
if (packets < 96) {
itr = q_vector->itr >> 2;
goto clear_counts;
}
/* If packet count is 96 or greater we are likely looking at a slight
* overrun of the delay we want. Try halving our delay to see if that
* will cut the number of packets in half per interrupt.
*/
if (packets < 256) {
itr = q_vector->itr >> 3;
if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
goto clear_counts;
}
/* The paths below assume we are dealing with a bulk ITR since number
* of packets is 256 or greater. We are just going to have to compute
* a value and try to bring the count under control, though for smaller
* packet sizes there isn't much we can do as NAPI polling will likely
* be kicking in sooner rather than later.
*/
itr = IXGBE_ITR_ADAPTIVE_BULK;
adjust_by_size:
/* If packet counts are 256 or greater we can assume we have a gross
* overestimation of what the rate should be. Instead of trying to fine
* tune it just use the formula below to try and dial in an exact value
* give the current packet size of the frame.
*/
avg_wire_size = bytes / packets;
/* The following is a crude approximation of:
* wmem_default / (size + overhead) = desired_pkts_per_int
* rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
* (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
*
* Assuming wmem_default is 212992 and overhead is 640 bytes per
* packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
* formula down to
*
* (170 * (size + 24)) / (size + 640) = ITR
*
* We first do some math on the packet size and then finally bitshift
* by 8 after rounding up. We also have to account for PCIe link speed
* difference as ITR scales based on this.
*/
if (avg_wire_size <= 60) {
/* Start at 50k ints/sec */
avg_wire_size = 5120;
} else if (avg_wire_size <= 316) {
/* 50K ints/sec to 16K ints/sec */
avg_wire_size *= 40;
avg_wire_size += 2720;
} else if (avg_wire_size <= 1084) {
/* 16K ints/sec to 9.2K ints/sec */
avg_wire_size *= 15;
avg_wire_size += 11452;
} else if (avg_wire_size <= 1980) {
/* 9.2K ints/sec to 8K ints/sec */
avg_wire_size *= 5;
avg_wire_size += 22420;
} else {
/* plateau at a limit of 8K ints/sec */
avg_wire_size = 32256;
}
/* If we are in low latency mode half our delay which doubles the rate
* to somewhere between 100K to 16K ints/sec
*/
if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
avg_wire_size >>= 1;
/* Resultant value is 256 times larger than it needs to be. This
* gives us room to adjust the value as needed to either increase
* or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
*
* Use addition as we have already recorded the new latency flag
* for the ITR value.
*/
switch (q_vector->adapter->link_speed) {
case IXGBE_LINK_SPEED_10GB_FULL:
case IXGBE_LINK_SPEED_100_FULL:
default:
itr += DIV_ROUND_UP(avg_wire_size,
IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
IXGBE_ITR_ADAPTIVE_MIN_INC;
break; break;
case bulk_latency: case IXGBE_LINK_SPEED_2_5GB_FULL:
if (bytes_perint <= 20) case IXGBE_LINK_SPEED_1GB_FULL:
itr_setting = low_latency; case IXGBE_LINK_SPEED_10_FULL:
itr += DIV_ROUND_UP(avg_wire_size,
IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
IXGBE_ITR_ADAPTIVE_MIN_INC;
break; break;
} }
/* clear work counters since we have the values we need */ clear_counts:
/* write back value */
ring_container->itr = itr;
/* next update should occur within next jiffy */
ring_container->next_update = next_update + 1;
ring_container->total_bytes = 0; ring_container->total_bytes = 0;
ring_container->total_packets = 0; ring_container->total_packets = 0;
/* write updated itr to ring container */
ring_container->itr = itr_setting;
} }
/** /**
...@@ -2625,34 +2749,19 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector) ...@@ -2625,34 +2749,19 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector)
static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector) static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
{ {
u32 new_itr = q_vector->itr; u32 new_itr;
u8 current_itr;
ixgbe_update_itr(q_vector, &q_vector->tx); ixgbe_update_itr(q_vector, &q_vector->tx);
ixgbe_update_itr(q_vector, &q_vector->rx); ixgbe_update_itr(q_vector, &q_vector->rx);
current_itr = max(q_vector->rx.itr, q_vector->tx.itr); /* use the smallest value of new ITR delay calculations */
new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
switch (current_itr) { /* Clear latency flag if set, shift into correct position */
/* counts and packets in update_itr are dependent on these numbers */ new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
case lowest_latency: new_itr <<= 2;
new_itr = IXGBE_100K_ITR;
break;
case low_latency:
new_itr = IXGBE_20K_ITR;
break;
case bulk_latency:
new_itr = IXGBE_12K_ITR;
break;
default:
break;
}
if (new_itr != q_vector->itr) { if (new_itr != q_vector->itr) {
/* do an exponential smoothing */
new_itr = (10 * new_itr * q_vector->itr) /
((9 * new_itr) + q_vector->itr);
/* save the algorithm value here */ /* save the algorithm value here */
q_vector->itr = new_itr; q_vector->itr = new_itr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment