Commit a132af24 authored by Mitch Williams's avatar Mitch Williams Committed by Jeff Kirsher

i40e/i40evf: Refactor the receive routines

Split the receive hot path code into two, one for packet split and one
for single buffer. This improves receive performance since we only need
to check if the ring is in packet split mode once per NAPI poll time,
not several times per packet. The single buffer code is further improved
by the removal of a bunch of code and several variables that are not
needed. On a receive-oriented test this can improve single-threaded
throughput.

Also refactor the packet split receive path to use a fixed buffer for
headers, like ixgbe does. This vastly reduces the number of DMA mappings
and unmappings we need to do, allowing for much better performance in
the presence of an IOMMU.

Lastly, correct packet split descriptor types now that we are actually
using them.

Change-ID: I3a194a93af3d2c31e77ff17644ac7376da6f3e4b
Signed-off-by: default avatarMitch Williams <mitch.a.williams@intel.com>
Tested-by: default avatarJim Young <james.m.young@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 694dc1cb
...@@ -2591,7 +2591,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -2591,7 +2591,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail); writel(0, ring->tail);
i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); if (ring_is_ps_enabled(ring)) {
i40e_alloc_rx_headers(ring);
i40e_alloc_rx_buffers_ps(ring, I40E_DESC_UNUSED(ring));
} else {
i40e_alloc_rx_buffers_1buf(ring, I40E_DESC_UNUSED(ring));
}
return 0; return 0;
} }
...@@ -7300,7 +7305,7 @@ static int i40e_sw_init(struct i40e_pf *pf) ...@@ -7300,7 +7305,7 @@ static int i40e_sw_init(struct i40e_pf *pf)
pf->flags = I40E_FLAG_RX_CSUM_ENABLED | pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
I40E_FLAG_MSI_ENABLED | I40E_FLAG_MSI_ENABLED |
I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSIX_ENABLED |
I40E_FLAG_RX_1BUF_ENABLED; I40E_FLAG_RX_PS_ENABLED;
/* Set default ITR */ /* Set default ITR */
pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF; pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
......
This diff is collapsed.
...@@ -96,6 +96,14 @@ enum i40e_dyn_idx_t { ...@@ -96,6 +96,14 @@ enum i40e_dyn_idx_t {
/* How many Rx Buffers do we bundle into one write to the hardware ? */ /* How many Rx Buffers do we bundle into one write to the hardware ? */
#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */ #define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */
#define I40E_RX_INCREMENT(r, i) \
do { \
(i)++; \
if ((i) == (r)->count) \
i = 0; \
r->next_to_clean = i; \
} while (0)
#define I40E_RX_NEXT_DESC(r, i, n) \ #define I40E_RX_NEXT_DESC(r, i, n) \
do { \ do { \
(i)++; \ (i)++; \
...@@ -151,6 +159,7 @@ struct i40e_tx_buffer { ...@@ -151,6 +159,7 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer { struct i40e_rx_buffer {
struct sk_buff *skb; struct sk_buff *skb;
void *hdr_buf;
dma_addr_t dma; dma_addr_t dma;
struct page *page; struct page *page;
dma_addr_t page_dma; dma_addr_t page_dma;
...@@ -223,8 +232,8 @@ struct i40e_ring { ...@@ -223,8 +232,8 @@ struct i40e_ring {
u16 rx_buf_len; u16 rx_buf_len;
u8 dtype; u8 dtype;
#define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_NO_SPLIT 0
#define I40E_RX_DTYPE_SPLIT_ALWAYS 1 #define I40E_RX_DTYPE_HEADER_SPLIT 1
#define I40E_RX_DTYPE_HEADER_SPLIT 2 #define I40E_RX_DTYPE_SPLIT_ALWAYS 2
u8 hsplit; u8 hsplit;
#define I40E_RX_SPLIT_L2 0x1 #define I40E_RX_SPLIT_L2 0x1
#define I40E_RX_SPLIT_IP 0x2 #define I40E_RX_SPLIT_IP 0x2
...@@ -280,7 +289,9 @@ struct i40e_ring_container { ...@@ -280,7 +289,9 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \ #define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next) for (pos = (head).ring; pos != NULL; pos = pos->next)
void i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); void i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count);
void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count);
void i40e_alloc_rx_headers(struct i40e_ring *rxr);
netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40e_clean_tx_ring(struct i40e_ring *tx_ring); void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
void i40e_clean_rx_ring(struct i40e_ring *rx_ring); void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
......
...@@ -96,6 +96,14 @@ enum i40e_dyn_idx_t { ...@@ -96,6 +96,14 @@ enum i40e_dyn_idx_t {
/* How many Rx Buffers do we bundle into one write to the hardware ? */ /* How many Rx Buffers do we bundle into one write to the hardware ? */
#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */ #define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */
#define I40E_RX_INCREMENT(r, i) \
do { \
(i)++; \
if ((i) == (r)->count) \
i = 0; \
r->next_to_clean = i; \
} while (0)
#define I40E_RX_NEXT_DESC(r, i, n) \ #define I40E_RX_NEXT_DESC(r, i, n) \
do { \ do { \
(i)++; \ (i)++; \
...@@ -150,6 +158,7 @@ struct i40e_tx_buffer { ...@@ -150,6 +158,7 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer { struct i40e_rx_buffer {
struct sk_buff *skb; struct sk_buff *skb;
void *hdr_buf;
dma_addr_t dma; dma_addr_t dma;
struct page *page; struct page *page;
dma_addr_t page_dma; dma_addr_t page_dma;
...@@ -222,8 +231,8 @@ struct i40e_ring { ...@@ -222,8 +231,8 @@ struct i40e_ring {
u16 rx_buf_len; u16 rx_buf_len;
u8 dtype; u8 dtype;
#define I40E_RX_DTYPE_NO_SPLIT 0 #define I40E_RX_DTYPE_NO_SPLIT 0
#define I40E_RX_DTYPE_SPLIT_ALWAYS 1 #define I40E_RX_DTYPE_HEADER_SPLIT 1
#define I40E_RX_DTYPE_HEADER_SPLIT 2 #define I40E_RX_DTYPE_SPLIT_ALWAYS 2
u8 hsplit; u8 hsplit;
#define I40E_RX_SPLIT_L2 0x1 #define I40E_RX_SPLIT_L2 0x1
#define I40E_RX_SPLIT_IP 0x2 #define I40E_RX_SPLIT_IP 0x2
...@@ -277,7 +286,9 @@ struct i40e_ring_container { ...@@ -277,7 +286,9 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \ #define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next) for (pos = (head).ring; pos != NULL; pos = pos->next)
void i40evf_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); void i40evf_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count);
void i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count);
void i40evf_alloc_rx_headers(struct i40e_ring *rxr);
netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40evf_clean_tx_ring(struct i40e_ring *tx_ring); void i40evf_clean_tx_ring(struct i40e_ring *tx_ring);
void i40evf_clean_rx_ring(struct i40e_ring *rx_ring); void i40evf_clean_rx_ring(struct i40e_ring *rx_ring);
......
...@@ -920,7 +920,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) ...@@ -920,7 +920,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter)
for (i = 0; i < adapter->num_active_queues; i++) { for (i = 0; i < adapter->num_active_queues; i++) {
struct i40e_ring *ring = adapter->rx_rings[i]; struct i40e_ring *ring = adapter->rx_rings[i];
i40evf_alloc_rx_buffers(ring, ring->count); i40evf_alloc_rx_buffers_1buf(ring, ring->count);
ring->next_to_use = ring->count - 1; ring->next_to_use = ring->count - 1;
writel(ring->next_to_use, ring->tail); writel(ring->next_to_use, ring->tail);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment