Commit 29b5e0f3 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'AF_XDP-zerocopy-for-i40e'

Björn Töpel says:

====================
This patch set introduces zero-copy AF_XDP support for Intel's i40e
driver. In the first preparatory patch we also add support for
XDP_REDIRECT for zero-copy allocated frames so that XDP programs can
redirect them. This was a ToDo from the first AF_XDP zero-copy patch
set from early June. Special thanks to Alex Duyck and Jesper Dangaard
Brouer for reviewing earlier versions of this patch set.

The i40e zero-copy code is located in its own file i40e_xsk.[ch]. Note
that in the interest of time, to get an AF_XDP zero-copy implementation
out there for people to try, some code paths have been copied from the
XDP path to the zero-copy path. It is out goal to merge the two paths
in later patch sets.

In contrast to the implementation from beginning of June, this patch
set does not require any extra HW queues for AF_XDP zero-copy
TX. Instead, the XDP TX HW queue is used for both XDP_REDIRECT and
AF_XDP zero-copy TX.

Jeff, given that most of changes are in i40e, it is up to you how you
would like to route these patches. The set is tagged bpf-next, but
if taking it via the Intel driver tree is easier, let us know.

We have run some benchmarks on a dual socket system with two Broadwell
E5 2660 @ 2.0 GHz with hyperthreading turned off. Each socket has 14
cores which gives a total of 28, but only two cores are used in these
experiments. One for TR/RX and one for the user space application. The
memory is DDR4 @ 2133 MT/s (1067 MHz) and the size of each DIMM is
8192MB and with 8 of those DIMMs in the system we have 64 GB of total
memory. The compiler used is gcc (Ubuntu 7.3.0-16ubuntu3) 7.3.0. The
NIC is Intel I40E 40Gbit/s using the i40e driver.

Below are the results in Mpps of the I40E NIC benchmark runs for 64
and 1500 byte packets, generated by a commercial packet generator HW
outputing packets at full 40 Gbit/s line rate. The results are with
retpoline and all other spectre and meltdown fixes, so these results
are not comparable to the ones from the zero-copy patch set in June.

AF_XDP performance 64 byte packets.
Benchmark   XDP_SKB    XDP_DRV    XDP_DRV with zerocopy
rxdrop       2.6        8.2         15.0
txpush       2.2        -           21.9
l2fwd        1.7        2.3         11.3

AF_XDP performance 1500 byte packets:
Benchmark   XDP_SKB   XDP_DRV     XDP_DRV with zerocopy
rxdrop       2.0        3.3         3.3
l2fwd        1.3        1.7         3.1

XDP performance on our system as a base line:

64 byte packets:
XDP stats       CPU     pps         issue-pps
XDP-RX CPU      16      18.4M  0

1500 byte packets:
XDP stats       CPU     pps         issue-pps
XDP-RX CPU      16      3.3M    0

The structure of the patch set is as follows:

Patch 1: Add support for XDP_REDIRECT of zero-copy allocated frames
Patches 2-4: Preparatory patches to common xsk and net code
Patches 5-7: Preparatory patches to i40e driver code for RX
Patch 8: i40e zero-copy support for RX
Patch 9: Preparatory patch to i40e driver code for TX
Patch 10: i40e zero-copy support for TX
Patch 11: Add flags to sample application to force zero-copy/copy mode
====================
Acked-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 7d2c6cfc 58c50ae4
...@@ -22,6 +22,7 @@ i40e-objs := i40e_main.o \ ...@@ -22,6 +22,7 @@ i40e-objs := i40e_main.o \
i40e_txrx.o \ i40e_txrx.o \
i40e_ptp.o \ i40e_ptp.o \
i40e_client.o \ i40e_client.o \
i40e_virtchnl_pf.o i40e_virtchnl_pf.o \
i40e_xsk.o
i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o i40e-$(CONFIG_I40E_DCB) += i40e_dcb.o i40e_dcb_nl.o
...@@ -786,6 +786,11 @@ struct i40e_vsi { ...@@ -786,6 +786,11 @@ struct i40e_vsi {
/* VSI specific handlers */ /* VSI specific handlers */
irqreturn_t (*irq_handler)(int irq, void *data); irqreturn_t (*irq_handler)(int irq, void *data);
/* AF_XDP zero-copy */
struct xdp_umem **xsk_umems;
u16 num_xsk_umems_used;
u16 num_xsk_umems;
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
struct i40e_netdev_priv { struct i40e_netdev_priv {
...@@ -1090,6 +1095,20 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi) ...@@ -1090,6 +1095,20 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
return !!vsi->xdp_prog; return !!vsi->xdp_prog;
} }
static inline struct xdp_umem *i40e_xsk_umem(struct i40e_ring *ring)
{
bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi);
int qid = ring->queue_index;
if (ring_is_xdp(ring))
qid -= ring->vsi->alloc_queue_pairs;
if (!ring->vsi->xsk_umems || !ring->vsi->xsk_umems[qid] || !xdp_on)
return NULL;
return ring->vsi->xsk_umems[qid];
}
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch); int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate); int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
......
...@@ -9,7 +9,9 @@ ...@@ -9,7 +9,9 @@
/* Local includes */ /* Local includes */
#include "i40e.h" #include "i40e.h"
#include "i40e_diag.h" #include "i40e_diag.h"
#include "i40e_xsk.h"
#include <net/udp_tunnel.h> #include <net/udp_tunnel.h>
#include <net/xdp_sock.h>
/* All i40e tracepoints are defined by the include below, which /* All i40e tracepoints are defined by the include below, which
* must be included exactly once across the whole kernel with * must be included exactly once across the whole kernel with
* CREATE_TRACE_POINTS defined * CREATE_TRACE_POINTS defined
...@@ -3072,6 +3074,9 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring) ...@@ -3072,6 +3074,9 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
i40e_status err = 0; i40e_status err = 0;
u32 qtx_ctl = 0; u32 qtx_ctl = 0;
if (ring_is_xdp(ring))
ring->xsk_umem = i40e_xsk_umem(ring);
/* some ATR related tx ring init */ /* some ATR related tx ring init */
if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) { if (vsi->back->flags & I40E_FLAG_FD_ATR_ENABLED) {
ring->atr_sample_rate = vsi->back->atr_sample_rate; ring->atr_sample_rate = vsi->back->atr_sample_rate;
...@@ -3181,13 +3186,46 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -3181,13 +3186,46 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
struct i40e_hw *hw = &vsi->back->hw; struct i40e_hw *hw = &vsi->back->hw;
struct i40e_hmc_obj_rxq rx_ctx; struct i40e_hmc_obj_rxq rx_ctx;
i40e_status err = 0; i40e_status err = 0;
bool ok;
int ret;
bitmap_zero(ring->state, __I40E_RING_STATE_NBITS); bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
/* clear the context structure first */ /* clear the context structure first */
memset(&rx_ctx, 0, sizeof(rx_ctx)); memset(&rx_ctx, 0, sizeof(rx_ctx));
ring->rx_buf_len = vsi->rx_buf_len; if (ring->vsi->type == I40E_VSI_MAIN)
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
ring->xsk_umem = i40e_xsk_umem(ring);
if (ring->xsk_umem) {
ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
XDP_PACKET_HEADROOM;
/* For AF_XDP ZC, we disallow packets to span on
* multiple buffers, thus letting us skip that
* handling in the fast-path.
*/
chain_len = 1;
ring->zca.free = i40e_zca_free;
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_ZERO_COPY,
&ring->zca);
if (ret)
return ret;
dev_info(&vsi->back->pdev->dev,
"Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
ring->queue_index);
} else {
ring->rx_buf_len = vsi->rx_buf_len;
if (ring->vsi->type == I40E_VSI_MAIN) {
ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL);
if (ret)
return ret;
}
}
rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len, rx_ctx.dbuff = DIV_ROUND_UP(ring->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT)); BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
...@@ -3243,7 +3281,15 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) ...@@ -3243,7 +3281,15 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail); writel(0, ring->tail);
i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring)); ok = ring->xsk_umem ?
i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) :
!i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
if (!ok) {
dev_info(&vsi->back->pdev->dev,
"Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
ring->xsk_umem ? "UMEM enabled " : "",
ring->queue_index, pf_q);
}
return 0; return 0;
} }
...@@ -11827,6 +11873,256 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, ...@@ -11827,6 +11873,256 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
return 0; return 0;
} }
/**
* i40e_enter_busy_conf - Enters busy config state
* @vsi: vsi
*
* Returns 0 on success, <0 for failure.
**/
static int i40e_enter_busy_conf(struct i40e_vsi *vsi)
{
struct i40e_pf *pf = vsi->back;
int timeout = 50;
while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
timeout--;
if (!timeout)
return -EBUSY;
usleep_range(1000, 2000);
}
return 0;
}
/**
* i40e_exit_busy_conf - Exits busy config state
* @vsi: vsi
**/
static void i40e_exit_busy_conf(struct i40e_vsi *vsi)
{
struct i40e_pf *pf = vsi->back;
clear_bit(__I40E_CONFIG_BUSY, pf->state);
}
/**
* i40e_queue_pair_reset_stats - Resets all statistics for a queue pair
* @vsi: vsi
* @queue_pair: queue pair
**/
static void i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
{
memset(&vsi->rx_rings[queue_pair]->rx_stats, 0,
sizeof(vsi->rx_rings[queue_pair]->rx_stats));
memset(&vsi->tx_rings[queue_pair]->stats, 0,
sizeof(vsi->tx_rings[queue_pair]->stats));
if (i40e_enabled_xdp_vsi(vsi)) {
memset(&vsi->xdp_rings[queue_pair]->stats, 0,
sizeof(vsi->xdp_rings[queue_pair]->stats));
}
}
/**
* i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair
* @vsi: vsi
* @queue_pair: queue pair
**/
static void i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
{
i40e_clean_tx_ring(vsi->tx_rings[queue_pair]);
if (i40e_enabled_xdp_vsi(vsi))
i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
}
/**
* i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair
* @vsi: vsi
* @queue_pair: queue pair
* @enable: true for enable, false for disable
**/
static void i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair,
bool enable)
{
struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
struct i40e_q_vector *q_vector = rxr->q_vector;
if (!vsi->netdev)
return;
/* All rings in a qp belong to the same qvector. */
if (q_vector->rx.ring || q_vector->tx.ring) {
if (enable)
napi_enable(&q_vector->napi);
else
napi_disable(&q_vector->napi);
}
}
/**
* i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair
* @vsi: vsi
* @queue_pair: queue pair
* @enable: true for enable, false for disable
*
* Returns 0 on success, <0 on failure.
**/
static int i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair,
bool enable)
{
struct i40e_pf *pf = vsi->back;
int pf_q, ret = 0;
pf_q = vsi->base_queue + queue_pair;
ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q,
false /*is xdp*/, enable);
if (ret) {
dev_info(&pf->pdev->dev,
"VSI seid %d Tx ring %d %sable timeout\n",
vsi->seid, pf_q, (enable ? "en" : "dis"));
return ret;
}
i40e_control_rx_q(pf, pf_q, enable);
ret = i40e_pf_rxq_wait(pf, pf_q, enable);
if (ret) {
dev_info(&pf->pdev->dev,
"VSI seid %d Rx ring %d %sable timeout\n",
vsi->seid, pf_q, (enable ? "en" : "dis"));
return ret;
}
/* Due to HW errata, on Rx disable only, the register can
* indicate done before it really is. Needs 50ms to be sure
*/
if (!enable)
mdelay(50);
if (!i40e_enabled_xdp_vsi(vsi))
return ret;
ret = i40e_control_wait_tx_q(vsi->seid, pf,
pf_q + vsi->alloc_queue_pairs,
true /*is xdp*/, enable);
if (ret) {
dev_info(&pf->pdev->dev,
"VSI seid %d XDP Tx ring %d %sable timeout\n",
vsi->seid, pf_q, (enable ? "en" : "dis"));
}
return ret;
}
/**
* i40e_queue_pair_enable_irq - Enables interrupts for a queue pair
* @vsi: vsi
* @queue_pair: queue_pair
**/
static void i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
{
struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
/* All rings in a qp belong to the same qvector. */
if (pf->flags & I40E_FLAG_MSIX_ENABLED)
i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx);
else
i40e_irq_dynamic_enable_icr0(pf);
i40e_flush(hw);
}
/**
* i40e_queue_pair_disable_irq - Disables interrupts for a queue pair
* @vsi: vsi
* @queue_pair: queue_pair
**/
static void i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
{
struct i40e_ring *rxr = vsi->rx_rings[queue_pair];
struct i40e_pf *pf = vsi->back;
struct i40e_hw *hw = &pf->hw;
/* For simplicity, instead of removing the qp interrupt causes
* from the interrupt linked list, we simply disable the interrupt, and
* leave the list intact.
*
* All rings in a qp belong to the same qvector.
*/
if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
wr32(hw, I40E_PFINT_DYN_CTLN(intpf - 1), 0);
i40e_flush(hw);
synchronize_irq(pf->msix_entries[intpf].vector);
} else {
/* Legacy and MSI mode - this stops all interrupt handling */
wr32(hw, I40E_PFINT_ICR0_ENA, 0);
wr32(hw, I40E_PFINT_DYN_CTL0, 0);
i40e_flush(hw);
synchronize_irq(pf->pdev->irq);
}
}
/**
* i40e_queue_pair_disable - Disables a queue pair
* @vsi: vsi
* @queue_pair: queue pair
*
* Returns 0 on success, <0 on failure.
**/
int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
{
int err;
err = i40e_enter_busy_conf(vsi);
if (err)
return err;
i40e_queue_pair_disable_irq(vsi, queue_pair);
err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false /* off */);
i40e_queue_pair_toggle_napi(vsi, queue_pair, false /* off */);
i40e_queue_pair_clean_rings(vsi, queue_pair);
i40e_queue_pair_reset_stats(vsi, queue_pair);
return err;
}
/**
* i40e_queue_pair_enable - Enables a queue pair
* @vsi: vsi
* @queue_pair: queue pair
*
* Returns 0 on success, <0 on failure.
**/
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)
{
int err;
err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]);
if (err)
return err;
if (i40e_enabled_xdp_vsi(vsi)) {
err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]);
if (err)
return err;
}
err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]);
if (err)
return err;
err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true /* on */);
i40e_queue_pair_toggle_napi(vsi, queue_pair, true /* on */);
i40e_queue_pair_enable_irq(vsi, queue_pair);
i40e_exit_busy_conf(vsi);
return err;
}
/** /**
* i40e_xdp - implements ndo_bpf for i40e * i40e_xdp - implements ndo_bpf for i40e
* @dev: netdevice * @dev: netdevice
...@@ -11847,6 +12143,12 @@ static int i40e_xdp(struct net_device *dev, ...@@ -11847,6 +12143,12 @@ static int i40e_xdp(struct net_device *dev,
case XDP_QUERY_PROG: case XDP_QUERY_PROG:
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0; xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
return 0; return 0;
case XDP_QUERY_XSK_UMEM:
return i40e_xsk_umem_query(vsi, &xdp->xsk.umem,
xdp->xsk.queue_id);
case XDP_SETUP_XSK_UMEM:
return i40e_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);
default: default:
return -EINVAL; return -EINVAL;
} }
...@@ -11886,6 +12188,7 @@ static const struct net_device_ops i40e_netdev_ops = { ...@@ -11886,6 +12188,7 @@ static const struct net_device_ops i40e_netdev_ops = {
.ndo_bridge_setlink = i40e_ndo_bridge_setlink, .ndo_bridge_setlink = i40e_ndo_bridge_setlink,
.ndo_bpf = i40e_xdp, .ndo_bpf = i40e_xdp,
.ndo_xdp_xmit = i40e_xdp_xmit, .ndo_xdp_xmit = i40e_xdp_xmit,
.ndo_xsk_async_xmit = i40e_xsk_async_xmit,
}; };
/** /**
......
This diff is collapsed.
...@@ -296,13 +296,17 @@ struct i40e_tx_buffer { ...@@ -296,13 +296,17 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer { struct i40e_rx_buffer {
dma_addr_t dma; dma_addr_t dma;
struct page *page; union {
#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) struct {
__u32 page_offset; struct page *page;
#else __u32 page_offset;
__u16 page_offset; __u16 pagecnt_bias;
#endif };
__u16 pagecnt_bias; struct {
void *addr;
u64 handle;
};
};
}; };
struct i40e_queue_stats { struct i40e_queue_stats {
...@@ -414,6 +418,8 @@ struct i40e_ring { ...@@ -414,6 +418,8 @@ struct i40e_ring {
struct i40e_channel *ch; struct i40e_channel *ch;
struct xdp_rxq_info xdp_rxq; struct xdp_rxq_info xdp_rxq;
struct xdp_umem *xsk_umem;
struct zero_copy_allocator zca; /* ZC allocator anchor */
} ____cacheline_internodealigned_in_smp; } ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring) static inline bool ring_uses_build_skb(struct i40e_ring *ring)
......
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright(c) 2018 Intel Corporation. */
#ifndef I40E_TXRX_COMMON_
#define I40E_TXRX_COMMON_
void i40e_fd_handle_status(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc, u8 prog_id);
int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
struct i40e_rx_buffer *i40e_clean_programming_status(
struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc,
u64 qw);
void i40e_process_skb_fields(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc, struct sk_buff *skb,
u8 rx_ptype);
void i40e_receive_skb(struct i40e_ring *rx_ring,
struct sk_buff *skb, u16 vlan_tag);
void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring);
void i40e_update_rx_stats(struct i40e_ring *rx_ring,
unsigned int total_rx_bytes,
unsigned int total_rx_packets);
void i40e_finalize_xdp_rx(struct i40e_ring *rx_ring, unsigned int xdp_res);
void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val);
#define I40E_XDP_PASS 0
#define I40E_XDP_CONSUMED BIT(0)
#define I40E_XDP_TX BIT(1)
#define I40E_XDP_REDIR BIT(2)
/**
* build_ctob - Builds the Tx descriptor (cmd, offset and type) qword
**/
static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
u32 td_tag)
{
return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
}
/**
* i40e_update_tx_stats - Update the egress statistics for the Tx ring
* @tx_ring: Tx ring to update
* @total_packet: total packets sent
* @total_bytes: total bytes sent
**/
static inline void i40e_update_tx_stats(struct i40e_ring *tx_ring,
unsigned int total_packets,
unsigned int total_bytes)
{
u64_stats_update_begin(&tx_ring->syncp);
tx_ring->stats.bytes += total_bytes;
tx_ring->stats.packets += total_packets;
u64_stats_update_end(&tx_ring->syncp);
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_packets += total_packets;
}
#define WB_STRIDE 4
/**
* i40e_arm_wb - (Possibly) arms Tx write-back
* @tx_ring: Tx ring to update
* @vsi: the VSI
* @budget: the NAPI budget left
**/
static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
struct i40e_vsi *vsi,
int budget)
{
if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
/* check to see if there are < 4 descriptors
* waiting to be written back, then kick the hardware to force
* them to be written back in case we stay in NAPI.
* In this mode on X722 we do not enable Interrupt.
*/
unsigned int j = i40e_get_tx_pending(tx_ring, false);
if (budget &&
((j / WB_STRIDE) == 0) && j > 0 &&
!test_bit(__I40E_VSI_DOWN, vsi->state) &&
(I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
tx_ring->arm_wb = true;
}
}
#endif /* I40E_TXRX_COMMON_ */
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright(c) 2018 Intel Corporation. */
#ifndef _I40E_XSK_H_
#define _I40E_XSK_H_
struct i40e_vsi;
struct xdp_umem;
struct zero_copy_allocator;
int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
int i40e_xsk_umem_query(struct i40e_vsi *vsi, struct xdp_umem **umem,
u16 qid);
int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
u16 qid);
void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
struct i40e_ring *tx_ring, int napi_budget);
int i40e_xsk_async_xmit(struct net_device *dev, u32 queue_id);
#endif /* _I40E_XSK_H_ */
...@@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n) ...@@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n)
barrier(); barrier();
} }
/**
* napi_if_scheduled_mark_missed - if napi is running, set the
* NAPIF_STATE_MISSED
* @n: NAPI context
*
* If napi is running, set the NAPIF_STATE_MISSED, and return true if
* NAPI is scheduled.
**/
static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
{
unsigned long val, new;
do {
val = READ_ONCE(n->state);
if (val & NAPIF_STATE_DISABLE)
return true;
if (!(val & NAPIF_STATE_SCHED))
return false;
new = val | NAPIF_STATE_MISSED;
} while (cmpxchg(&n->state, val, new) != val);
return true;
}
enum netdev_queue_state_t { enum netdev_queue_state_t {
__QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_DRV_XOFF,
__QUEUE_STATE_STACK_XOFF, __QUEUE_STATE_STACK_XOFF,
......
...@@ -91,6 +91,8 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) ...@@ -91,6 +91,8 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
frame->dev_rx = NULL; frame->dev_rx = NULL;
} }
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
/* Convert xdp_buff to xdp_frame */ /* Convert xdp_buff to xdp_frame */
static inline static inline
struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
...@@ -99,9 +101,8 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) ...@@ -99,9 +101,8 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
int metasize; int metasize;
int headroom; int headroom;
/* TODO: implement clone, copy, use "native" MEM_TYPE */
if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
return NULL; return xdp_convert_zc_to_xdp_frame(xdp);
/* Assure headroom is available for storing info */ /* Assure headroom is available for storing info */
headroom = xdp->data - xdp->data_hard_start; headroom = xdp->data - xdp->data_hard_start;
...@@ -135,6 +136,7 @@ void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); ...@@ -135,6 +136,7 @@ void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq);
bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);
int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
enum xdp_mem_type type, void *allocator); enum xdp_mem_type type, void *allocator);
void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq);
/* Drivers not supporting XDP metadata can use this helper, which /* Drivers not supporting XDP metadata can use this helper, which
* rejects any room expansion for metadata as a result. * rejects any room expansion for metadata as a result.
......
...@@ -79,6 +79,16 @@ void xsk_umem_discard_addr(struct xdp_umem *umem); ...@@ -79,6 +79,16 @@ void xsk_umem_discard_addr(struct xdp_umem *umem);
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len);
void xsk_umem_consume_tx_done(struct xdp_umem *umem); void xsk_umem_consume_tx_done(struct xdp_umem *umem);
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
}
static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
}
#else #else
static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
{ {
...@@ -98,6 +108,39 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) ...@@ -98,6 +108,39 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{ {
return false; return false;
} }
static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
return NULL;
}
static inline void xsk_umem_discard_addr(struct xdp_umem *umem)
{
}
static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
{
}
static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma,
u32 *len)
{
return false;
}
static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
{
}
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
return NULL;
}
static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
return 0;
}
#endif /* CONFIG_XDP_SOCKETS */ #endif /* CONFIG_XDP_SOCKETS */
#endif /* _LINUX_XDP_SOCK_H */ #endif /* _LINUX_XDP_SOCK_H */
...@@ -94,11 +94,21 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) ...@@ -94,11 +94,21 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
kfree(xa); kfree(xa);
} }
static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{ {
struct xdp_mem_allocator *xa; struct xdp_mem_allocator *xa;
int id = xdp_rxq->mem.id; int id = xdp_rxq->mem.id;
if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
WARN(1, "Missing register, driver bug");
return;
}
if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL &&
xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) {
return;
}
if (id == 0) if (id == 0)
return; return;
...@@ -110,6 +120,7 @@ static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) ...@@ -110,6 +120,7 @@ static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
mutex_unlock(&mem_id_lock); mutex_unlock(&mem_id_lock);
} }
EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
{ {
...@@ -119,7 +130,7 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) ...@@ -119,7 +130,7 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
__xdp_rxq_info_unreg_mem_model(xdp_rxq); xdp_rxq_info_unreg_mem_model(xdp_rxq);
xdp_rxq->reg_state = REG_STATE_UNREGISTERED; xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
xdp_rxq->dev = NULL; xdp_rxq->dev = NULL;
...@@ -398,3 +409,42 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, ...@@ -398,3 +409,42 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
info->flags = bpf->flags; info->flags = bpf->flags;
} }
EXPORT_SYMBOL_GPL(xdp_attachment_setup); EXPORT_SYMBOL_GPL(xdp_attachment_setup);
struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
{
unsigned int metasize, headroom, totsize;
void *addr, *data_to_copy;
struct xdp_frame *xdpf;
struct page *page;
/* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */
metasize = xdp_data_meta_unsupported(xdp) ? 0 :
xdp->data - xdp->data_meta;
headroom = xdp->data - xdp->data_hard_start;
totsize = xdp->data_end - xdp->data + metasize;
if (sizeof(*xdpf) + totsize > PAGE_SIZE)
return NULL;
page = dev_alloc_page();
if (!page)
return NULL;
addr = page_to_virt(page);
xdpf = addr;
memset(xdpf, 0, sizeof(*xdpf));
addr += sizeof(*xdpf);
data_to_copy = metasize ? xdp->data_meta : xdp->data;
memcpy(addr, data_to_copy, totsize);
xdpf->data = addr + metasize;
xdpf->len = totsize - metasize;
xdpf->headroom = 0;
xdpf->metasize = metasize;
xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
xdp_return_buff(xdp);
return xdpf;
}
EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
...@@ -8,16 +8,6 @@ ...@@ -8,16 +8,6 @@
#include <net/xdp_sock.h> #include <net/xdp_sock.h>
static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
{
return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1));
}
static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
{
return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1));
}
int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
u32 queue_id, u16 flags); u32 queue_id, u16 flags);
bool xdp_umem_validate_queues(struct xdp_umem *umem); bool xdp_umem_validate_queues(struct xdp_umem *umem);
......
...@@ -649,6 +649,8 @@ static struct option long_options[] = { ...@@ -649,6 +649,8 @@ static struct option long_options[] = {
{"xdp-skb", no_argument, 0, 'S'}, {"xdp-skb", no_argument, 0, 'S'},
{"xdp-native", no_argument, 0, 'N'}, {"xdp-native", no_argument, 0, 'N'},
{"interval", required_argument, 0, 'n'}, {"interval", required_argument, 0, 'n'},
{"zero-copy", no_argument, 0, 'z'},
{"copy", no_argument, 0, 'c'},
{0, 0, 0, 0} {0, 0, 0, 0}
}; };
...@@ -667,6 +669,8 @@ static void usage(const char *prog) ...@@ -667,6 +669,8 @@ static void usage(const char *prog)
" -S, --xdp-skb=n Use XDP skb-mod\n" " -S, --xdp-skb=n Use XDP skb-mod\n"
" -N, --xdp-native=n Enfore XDP native mode\n" " -N, --xdp-native=n Enfore XDP native mode\n"
" -n, --interval=n Specify statistics update interval (default 1 sec).\n" " -n, --interval=n Specify statistics update interval (default 1 sec).\n"
" -z, --zero-copy Force zero-copy mode.\n"
" -c, --copy Force copy mode.\n"
"\n"; "\n";
fprintf(stderr, str, prog); fprintf(stderr, str, prog);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
...@@ -679,7 +683,7 @@ static void parse_command_line(int argc, char **argv) ...@@ -679,7 +683,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0; opterr = 0;
for (;;) { for (;;) {
c = getopt_long(argc, argv, "rtli:q:psSNn:", long_options, c = getopt_long(argc, argv, "rtli:q:psSNn:cz", long_options,
&option_index); &option_index);
if (c == -1) if (c == -1)
break; break;
...@@ -716,6 +720,12 @@ static void parse_command_line(int argc, char **argv) ...@@ -716,6 +720,12 @@ static void parse_command_line(int argc, char **argv)
case 'n': case 'n':
opt_interval = atoi(optarg); opt_interval = atoi(optarg);
break; break;
case 'z':
opt_xdp_bind_flags |= XDP_ZEROCOPY;
break;
case 'c':
opt_xdp_bind_flags |= XDP_COPY;
break;
default: default:
usage(basename(argv[0])); usage(basename(argv[0]));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment