Commit a0073a4b authored by Alexander Duyck's avatar Alexander Duyck Committed by Jeff Kirsher

i40e/i40evf: Add support for new mechanism of updating adaptive ITR

This patch replaces the existing mechanism for determining the correct
value to program for adaptive ITR with yet another new and more
complicated approach.

The basic idea from a 30K foot view is that this new approach will push the
Rx interrupt moderation up so that by default it starts in low latency and
is gradually pushed up into a higher latency setup as long as doing so
increases the number of packets processed, if the number of packets drops
to 4 to 1 per packet we will reset and just base our ITR on the size of the
packets being received. For Tx we leave it floating at a high interrupt
delay and do not pull it down unless we start processing more than 112
packets per interrupt. If we start exceeding that we will cut our interrupt
rates in half until we are back below 112.

The side effect of these patches are that we will be processing more
packets per interrupt. This is both a good and a bad thing as it means we
will not be blocking processing in the case of things like pktgen and XDP,
but we will also be consuming a bit more CPU in the cases of things such as
network throughput tests using netperf.

One delta from this versus the ixgbe version of the changes is that I have
made the interrupt moderation a bit more aggressive when we are in bulk
mode by moving our "goldilocks zone" up from 48 to 96 to 56 to 112. The
main motivation behind moving this is to address the fact that we need to
update less frequently, and have more fine grained control due to the
separate Tx and Rx ITR times.
Signed-off-by: default avatarAlexander Duyck <alexander.h.duyck@intel.com>
Tested-by: default avatarAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 556fdfd6
...@@ -824,6 +824,7 @@ struct i40e_q_vector { ...@@ -824,6 +824,7 @@ struct i40e_q_vector {
struct i40e_ring_container rx; struct i40e_ring_container rx;
struct i40e_ring_container tx; struct i40e_ring_container tx;
u8 itr_countdown; /* when 0 should adjust adaptive ITR */
u8 num_ringpairs; /* total number of ring pairs in vector */ u8 num_ringpairs; /* total number of ring pairs in vector */
cpumask_t affinity_mask; cpumask_t affinity_mask;
...@@ -832,8 +833,6 @@ struct i40e_q_vector { ...@@ -832,8 +833,6 @@ struct i40e_q_vector {
struct rcu_head rcu; /* to avoid race with update stats on free */ struct rcu_head rcu; /* to avoid race with update stats on free */
char name[I40E_INT_NAME_STR_LEN]; char name[I40E_INT_NAME_STR_LEN];
bool arm_wb_state; bool arm_wb_state;
#define ITR_COUNTDOWN_START 100
u8 itr_countdown; /* when 0 should adjust ITR */
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
/* lan device */ /* lan device */
......
...@@ -3449,19 +3449,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi) ...@@ -3449,19 +3449,20 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_q_vectors; i++, vector++) { for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
struct i40e_q_vector *q_vector = vsi->q_vectors[i]; struct i40e_q_vector *q_vector = vsi->q_vectors[i];
q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->rx.next_update = jiffies + 1;
q_vector->rx.target_itr = q_vector->rx.target_itr =
ITR_TO_REG(vsi->rx_rings[i]->itr_setting); ITR_TO_REG(vsi->rx_rings[i]->itr_setting);
q_vector->rx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1), wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
q_vector->rx.target_itr); q_vector->rx.target_itr);
q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->tx.next_update = jiffies + 1;
q_vector->tx.target_itr = q_vector->tx.target_itr =
ITR_TO_REG(vsi->tx_rings[i]->itr_setting); ITR_TO_REG(vsi->tx_rings[i]->itr_setting);
q_vector->tx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1), wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
q_vector->tx.target_itr); q_vector->tx.target_itr);
q_vector->tx.current_itr = q_vector->tx.target_itr; q_vector->tx.current_itr = q_vector->tx.target_itr;
wr32(hw, I40E_PFINT_RATEN(vector - 1), wr32(hw, I40E_PFINT_RATEN(vector - 1),
i40e_intrl_usec_to_reg(vsi->int_rate_limit)); i40e_intrl_usec_to_reg(vsi->int_rate_limit));
...@@ -3562,13 +3563,12 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi) ...@@ -3562,13 +3563,12 @@ static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
u32 val; u32 val;
/* set the ITR configuration */ /* set the ITR configuration */
q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->rx.next_update = jiffies + 1;
q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting); q_vector->rx.target_itr = ITR_TO_REG(vsi->rx_rings[0]->itr_setting);
q_vector->rx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr); wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.target_itr);
q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr;
q_vector->tx.next_update = jiffies + 1;
q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting); q_vector->tx.target_itr = ITR_TO_REG(vsi->tx_rings[0]->itr_setting);
q_vector->tx.latency_range = I40E_LOW_LATENCY;
wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr); wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.target_itr);
q_vector->tx.current_itr = q_vector->tx.target_itr; q_vector->tx.current_itr = q_vector->tx.target_itr;
...@@ -10345,9 +10345,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu) ...@@ -10345,9 +10345,6 @@ static int i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx, int cpu)
netif_napi_add(vsi->netdev, &q_vector->napi, netif_napi_add(vsi->netdev, &q_vector->napi,
i40e_napi_poll, NAPI_POLL_WEIGHT); i40e_napi_poll, NAPI_POLL_WEIGHT);
q_vector->rx.latency_range = I40E_LOW_LATENCY;
q_vector->tx.latency_range = I40E_LOW_LATENCY;
/* tie q_vector and vsi together */ /* tie q_vector and vsi together */
vsi->q_vectors[v_idx] = q_vector; vsi->q_vectors[v_idx] = q_vector;
......
This diff is collapsed.
...@@ -463,20 +463,19 @@ static inline void set_ring_xdp(struct i40e_ring *ring) ...@@ -463,20 +463,19 @@ static inline void set_ring_xdp(struct i40e_ring *ring)
ring->flags |= I40E_TXR_FLAGS_XDP; ring->flags |= I40E_TXR_FLAGS_XDP;
} }
enum i40e_latency_range { #define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
I40E_LOWEST_LATENCY = 0, #define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
I40E_LOW_LATENCY = 1, #define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
I40E_BULK_LATENCY = 2, #define I40E_ITR_ADAPTIVE_LATENCY 0x8000
}; #define I40E_ITR_ADAPTIVE_BULK 0x0000
#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
struct i40e_ring_container { struct i40e_ring_container {
/* array of pointers to rings */ struct i40e_ring *ring; /* pointer to linked list of ring(s) */
struct i40e_ring *ring; unsigned long next_update; /* jiffies value of next update */
unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */ unsigned int total_packets; /* total packets processed this int */
unsigned long last_itr_update; /* jiffies of last ITR update */
u16 count; u16 count;
enum i40e_latency_range latency_range;
u16 target_itr; /* target ITR setting for ring(s) */ u16 target_itr; /* target ITR setting for ring(s) */
u16 current_itr; /* current ITR setting for ring(s) */ u16 current_itr; /* current ITR setting for ring(s) */
}; };
......
...@@ -428,20 +428,19 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) ...@@ -428,20 +428,19 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
} }
enum i40e_latency_range { #define I40E_ITR_ADAPTIVE_MIN_INC 0x0002
I40E_LOWEST_LATENCY = 0, #define I40E_ITR_ADAPTIVE_MIN_USECS 0x0002
I40E_LOW_LATENCY = 1, #define I40E_ITR_ADAPTIVE_MAX_USECS 0x007e
I40E_BULK_LATENCY = 2, #define I40E_ITR_ADAPTIVE_LATENCY 0x8000
}; #define I40E_ITR_ADAPTIVE_BULK 0x0000
#define ITR_IS_BULK(x) (!((x) & I40E_ITR_ADAPTIVE_LATENCY))
struct i40e_ring_container { struct i40e_ring_container {
/* array of pointers to rings */ struct i40e_ring *ring; /* pointer to linked list of ring(s) */
struct i40e_ring *ring; unsigned long next_update; /* jiffies value of next update */
unsigned int total_bytes; /* total bytes processed this int */ unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */ unsigned int total_packets; /* total packets processed this int */
unsigned long last_itr_update; /* jiffies of last ITR update */
u16 count; u16 count;
enum i40e_latency_range latency_range;
u16 target_itr; /* target ITR setting for ring(s) */ u16 target_itr; /* target ITR setting for ring(s) */
u16 current_itr; /* current ITR setting for ring(s) */ u16 current_itr; /* current ITR setting for ring(s) */
}; };
......
...@@ -117,9 +117,8 @@ struct i40e_q_vector { ...@@ -117,9 +117,8 @@ struct i40e_q_vector {
struct i40e_ring_container rx; struct i40e_ring_container rx;
struct i40e_ring_container tx; struct i40e_ring_container tx;
u32 ring_mask; u32 ring_mask;
u8 itr_countdown; /* when 0 should adjust adaptive ITR */
u8 num_ringpairs; /* total number of ring pairs in vector */ u8 num_ringpairs; /* total number of ring pairs in vector */
#define ITR_COUNTDOWN_START 100
u8 itr_countdown; /* when 0 or 1 update ITR */
u16 v_idx; /* index in the vsi->q_vector array. */ u16 v_idx; /* index in the vsi->q_vector array. */
u16 reg_idx; /* register index of the interrupt */ u16 reg_idx; /* register index of the interrupt */
char name[IFNAMSIZ + 15]; char name[IFNAMSIZ + 15];
......
...@@ -353,10 +353,9 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) ...@@ -353,10 +353,9 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx)
rx_ring->vsi = &adapter->vsi; rx_ring->vsi = &adapter->vsi;
q_vector->rx.ring = rx_ring; q_vector->rx.ring = rx_ring;
q_vector->rx.count++; q_vector->rx.count++;
q_vector->rx.latency_range = I40E_LOW_LATENCY; q_vector->rx.next_update = jiffies + 1;
q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting); q_vector->rx.target_itr = ITR_TO_REG(rx_ring->itr_setting);
q_vector->ring_mask |= BIT(r_idx); q_vector->ring_mask |= BIT(r_idx);
q_vector->itr_countdown = ITR_COUNTDOWN_START;
wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx), wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, q_vector->reg_idx),
q_vector->rx.current_itr); q_vector->rx.current_itr);
q_vector->rx.current_itr = q_vector->rx.target_itr; q_vector->rx.current_itr = q_vector->rx.target_itr;
...@@ -380,9 +379,8 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) ...@@ -380,9 +379,8 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
tx_ring->vsi = &adapter->vsi; tx_ring->vsi = &adapter->vsi;
q_vector->tx.ring = tx_ring; q_vector->tx.ring = tx_ring;
q_vector->tx.count++; q_vector->tx.count++;
q_vector->tx.latency_range = I40E_LOW_LATENCY; q_vector->tx.next_update = jiffies + 1;
q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting); q_vector->tx.target_itr = ITR_TO_REG(tx_ring->itr_setting);
q_vector->itr_countdown = ITR_COUNTDOWN_START;
q_vector->num_ringpairs++; q_vector->num_ringpairs++;
wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx), wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, q_vector->reg_idx),
q_vector->tx.target_itr); q_vector->tx.target_itr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment