Commit 0a714186 authored by Björn Töpel's avatar Björn Töpel Committed by Alexei Starovoitov

i40e: add AF_XDP zero-copy Rx support

This patch adds zero-copy Rx support for AF_XDP sockets. Instead of
allocating buffers of type MEM_TYPE_PAGE_SHARED, the Rx frames are
allocated as MEM_TYPE_ZERO_COPY when AF_XDP is enabled for a certain
queue.

All AF_XDP specific functions are added to a new file, i40e_xsk.c.

Note that when AF_XDP zero-copy is enabled, the XDP action XDP_PASS
will allocate a new buffer and copy the zero-copy frame prior passing
it to the kernel stack.
Signed-off-by: default avatarBjörn Töpel <bjorn.topel@intel.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 20a739db
...@@ -22,6 +22,7 @@ i40e-objs := i40e_main.o \ ...@@ -22,6 +22,7 @@ i40e-objs := i40e_main.o \
i40e_txrx.o \ i40e_txrx.o \
i40e_ptp.o \ i40e_ptp.o \
i40e_client.o \ i40e_client.o \
i40e_virtchnl_pf.o i40e_virtchnl_pf.o \
i40e_xsk.o
i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
...@@ -786,6 +786,11 @@ struct i40e_vsi { ...@@ -786,6 +786,11 @@ struct i40e_vsi {
/* VSI specific handlers */ /* VSI specific handlers */
irqreturn_t (*irq_handler)(int irq, void *data); irqreturn_t (*irq_handler)(int irq, void *data);
/* AF_XDP zero-copy */
struct xdp_umem **xsk_umems;
u16 num_xsk_umems_used;
u16 num_xsk_umems;
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
struct i40e_netdev_priv { struct i40e_netdev_priv {
...@@ -1090,6 +1095,20 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) ...@@ -1090,6 +1095,20 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
return !!vsi->xdp_prog; return !!vsi->xdp_prog;
} }
static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
{
bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
int qid = ring->queue_index;
if (ring_is_xdp(ring))
qid -= ring->vsi->alloc_queue_pairs;
if (!ring->vsi->xsk_umems || !ring->vsi->xsk_umems[qid] || !xdp_on)
return NULL;
return ring->vsi->xsk_umems[qid];
}
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
......
...@@ -9,7 +9,9 @@ ...@@ -9,7 +9,9 @@
/* Local includes */ /* Local includes */
#include "i40e.h" #include "i40e.h"
#include "i40e_diag.h" #include "i40e_diag.h"
#include "i40e_xsk.h"
#include <net/udp_tunnel.h> #include <net/udp_tunnel.h>
#include <net/xdp_sock.h>
/* All i40e tracepoints are defined by the include below, which /* All i40e tracepoints are defined by the include below, which
* must be included exactly once across the whole kernel with * must be included exactly once across the whole kernel with
* CREATE_TRACE_POINTS defined * CREATE_TRACE_POINTS defined
...@@ -3181,13 +3183,46 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -3181,13 +3183,46 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
struct i40e_hw *hw = &vsi->back->hw; struct i40e_hw *hw = &vsi->back->hw;
struct i40e_hmc_obj_rxq rx_ctx; struct i40e_hmc_obj_rxq rx_ctx;
i40e_status err = 0; i40e_status err = 0;
bool ok;
int ret;
bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
/* clear the context structure first */ /* clear the context structure first */
memset(&rx_ctx, 0, sizeof(rx_ctx)); memset(&rx_ctx, 0, sizeof(rx_ctx));
ring->rx_buf_len = vsi->rx_buf_len; if (ring->vsi->type == I40E_VSI_MAIN)
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
ring->xsk_umem = i40e_xsk_umem(ring);
if (ring->xsk_umem) {
ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
XDP_PACKET_HEADROOM;
/* For AF_XDP ZC, we disallow packets to span on
* multiple buffers, thus letting us skip that
* handling in the fast-path.
*/
chain_len = 1;
ring->zca.free = i40e_zca_free;
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_ZERO_COPY,
&ring->zca);
if (ret)
return ret;
dev_info(&vsi->back->pdev->dev,
"Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
ring->queue_index);
} else {
ring->rx_buf_len = vsi->rx_buf_len;
if (ring->vsi->type == I40E_VSI_MAIN) {
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL);
if (ret)
return ret;
}
}
rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
...@@ -3243,7 +3278,15 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -3243,7 +3278,15 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail); writel(0, ring->tail);
i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); ok = ring->xsk_umem ?
i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) :
!i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
if (!ok) {
dev_info(&vsi->back->pdev->dev,
"Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
ring->xsk_umem ? "UMEM enabled " : "",
ring->queue_index, pf_q);
}
return 0; return 0;
} }
...@@ -12097,6 +12140,12 @@ static int i40e_xdp(struct net_device *dev, ...@@ -12097,6 +12140,12 @@ static int i40e_xdp(struct net_device *dev,
case XDP_QUERY_PROG: case XDP_QUERY_PROG:
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
return 0; return 0;
case XDP_QUERY_XSK_UMEM:
return i40e_xsk_umem_query(vsi, &xdp->xsk.umem,
xdp->xsk.queue_id);
case XDP_SETUP_XSK_UMEM:
return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);
default: default:
return -EINVAL; return -EINVAL;
} }
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "i40e_trace.h" #include "i40e_trace.h"
#include "i40e_prototype.h" #include "i40e_prototype.h"
#include "i40e_txrx_common.h" #include "i40e_txrx_common.h"
#include "i40e_xsk.h"
static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
u32 td_tag) u32 td_tag)
...@@ -1380,6 +1381,9 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) ...@@ -1380,6 +1381,9 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_ring->skb = NULL; rx_ring->skb = NULL;
} }
if (rx_ring->xsk_umem)
goto skip_free;
/* Free all the Rx ring sk_buffs */ /* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) { for (i = 0; i < rx_ring->count; i++) {
struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i]; struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
...@@ -1408,6 +1412,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) ...@@ -1408,6 +1412,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
rx_bi->page_offset = 0; rx_bi->page_offset = 0;
} }
skip_free:
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count; bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
memset(rx_ring->rx_bi, 0, bi_size); memset(rx_ring->rx_bi, 0, bi_size);
...@@ -2641,7 +2646,9 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) ...@@ -2641,7 +2646,9 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
budget_per_ring = max(budget/q_vector->num_ringpairs, 1); budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
i40e_for_each_ring(ring, q_vector->rx) { i40e_for_each_ring(ring, q_vector->rx) {
int cleaned = i40e_clean_rx_irq(ring, budget_per_ring); int cleaned = ring->xsk_umem ?
i40e_clean_rx_irq_zc(ring, budget_per_ring) :
i40e_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned; work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */ /* if we clean as many as budgeted, we must not be done */
......
...@@ -296,13 +296,17 @@ struct i40e_tx_buffer { ...@@ -296,13 +296,17 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer { struct i40e_rx_buffer {
dma_addr_t dma; dma_addr_t dma;
struct page *page; union {
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) struct {
__u32 page_offset; struct page *page;
#else __u32 page_offset;
__u16 page_offset; __u16 pagecnt_bias;
#endif };
__u16 pagecnt_bias; struct {
void *addr;
u64 handle;
};
};
}; };
struct i40e_queue_stats { struct i40e_queue_stats {
...@@ -414,6 +418,8 @@ struct i40e_ring { ...@@ -414,6 +418,8 @@ struct i40e_ring {
struct i40e_channel *ch; struct i40e_channel *ch;
struct xdp_rxq_info xdp_rxq; struct xdp_rxq_info xdp_rxq;
struct xdp_umem *xsk_umem;
struct zero_copy_allocator zca; /* ZC allocator anchor */
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring) static inline bool ring_uses_build_skb(struct i40e_ring *ring)
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright(c) 2018 Intel Corporation. */
#ifndef _I40E_XSK_H_
#define _I40E_XSK_H_
struct i40e_vsi;
struct xdp_umem;
struct zero_copy_allocator;
int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
u16 qid);
int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
u16 qid);
void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
#endif /* _I40E_XSK_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment