Commit 10d1b0e4 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'xdp-ice-mbuf'

Maciej Fijalkowski says:

====================
Although this work started as an effort to add multi-buffer XDP support
to ice driver, as usual it turned out that some other side stuff needed
to be addressed, so let me give you an overview.

First patch adjusts legacy-rx in a way that it will be possible to refer
to skb_shared_info being at the end of the buffer when gathering up
frame fragments within xdp_buff.

Then, patches 2-9 prepare ice driver in a way that actual multi-buffer
patches will be easier to swallow.

10 and 11 are the meat. What is worth mentioning is that this set
actually *fixes* things as patch 11 removes the logic based on
next_dd/rs and we previously stepped away from this for ice_xmit_zc().
Currently, AF_XDP ZC XDP_TX workload is off as there are two cleaning
sides that can be triggered and two of them work on different internal
logic. This set unifies that and allows us to improve the performance by
2x with a trick on the last (13) patch.

12th is a simple cleanup of no longer fields from Tx ring.

I might be wrong but I have not seen anyone reporting performance impact
among patches that add XDP multi-buffer support to a particular driver.
Numbers below were gathered via xdp_rxq_info and xdp_redirect_map on
1500 MTU:

XDP_DROP      +1%
XDP_PASS      -1,2%
XDP_TX        -0,5%
XDP_REDIRECT  -3,3%

Cherry on top, which is not directly related to mbuf support (last
patch):

XDP_TX ZC +126%

Target the we agreed on was to not degrade performance for any action by
anything that would be over 5%, so our goal was met. Basically this set
keeps the performance where it was. Redirect is slower due to more
frequent tail bumps.
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Reviewed-by: default avatarAlexander Lobakin <alexandr.lobakin@intel.com>
parents c1a3daf7 a24b4c6e
......@@ -355,9 +355,6 @@ static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring)
{
if (ice_ring_uses_build_skb(rx_ring))
return ICE_SKB_PAD;
else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
return XDP_PACKET_HEADROOM;
return 0;
}
......@@ -495,7 +492,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
{
struct device *dev = ice_pf_to_dev(ring->vsi->back);
u16 num_bufs = ICE_DESC_UNUSED(ring);
u32 num_bufs = ICE_RX_DESC_UNUSED(ring);
int err;
ring->rx_buf_len = ring->vsi->rx_buf_len;
......@@ -503,8 +500,10 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
if (ring->vsi->type == ICE_VSI_PF) {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index, ring->q_vector->napi.napi_id);
__xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index,
ring->q_vector->napi.napi_id,
ring->vsi->rx_buf_len);
ring->xsk_pool = ice_xsk_pool(ring);
if (ring->xsk_pool) {
......@@ -524,9 +523,11 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
} else {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq,
__xdp_rxq_info_reg(&ring->xdp_rxq,
ring->netdev,
ring->q_index, ring->q_vector->napi.napi_id);
ring->q_index,
ring->q_vector->napi.napi_id,
ring->vsi->rx_buf_len);
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
......@@ -536,6 +537,8 @@ int ice_vsi_cfg_rxq(struct ice_rx_ring *ring)
}
}
xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq);
ring->xdp.data = NULL;
err = ice_setup_rx_ctx(ring);
if (err) {
dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
......
......@@ -3046,8 +3046,6 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
/* clone ring and setup updated count */
xdp_rings[i] = *vsi->xdp_rings[i];
xdp_rings[i].count = new_tx_cnt;
xdp_rings[i].next_dd = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
xdp_rings[i].next_rs = ICE_RING_QUARTER(&xdp_rings[i]) - 1;
xdp_rings[i].desc = NULL;
xdp_rings[i].tx_buf = NULL;
err = ice_setup_tx_ring(&xdp_rings[i]);
......@@ -3092,7 +3090,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
/* allocate Rx buffers */
err = ice_alloc_rx_bufs(&rx_rings[i],
ICE_DESC_UNUSED(&rx_rings[i]));
ICE_RX_DESC_UNUSED(&rx_rings[i]));
rx_unwind:
if (err) {
while (i) {
......
......@@ -1992,8 +1992,8 @@ void ice_update_eth_stats(struct ice_vsi *vsi)
void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
{
if (!vsi->netdev || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags)) {
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
vsi->rx_buf_len = ICE_RXBUF_2048;
vsi->max_frame = ICE_MAX_FRAME_LEGACY_RX;
vsi->rx_buf_len = ICE_RXBUF_1664;
#if (PAGE_SIZE < 8192)
} else if (!ICE_2K_TOO_SMALL_WITH_PADDING &&
(vsi->netdev->mtu <= ETH_DATA_LEN)) {
......@@ -2002,11 +2002,7 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
#endif
} else {
vsi->max_frame = ICE_AQ_SET_MAC_FRAME_SIZE_MAX;
#if (PAGE_SIZE < 8192)
vsi->rx_buf_len = ICE_RXBUF_3072;
#else
vsi->rx_buf_len = ICE_RXBUF_2048;
#endif
}
}
......
......@@ -2570,8 +2570,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
xdp_ring->netdev = NULL;
xdp_ring->dev = dev;
xdp_ring->count = vsi->num_tx_desc;
xdp_ring->next_dd = ICE_RING_QUARTER(xdp_ring) - 1;
xdp_ring->next_rs = ICE_RING_QUARTER(xdp_ring) - 1;
WRITE_ONCE(vsi->xdp_rings[i], xdp_ring);
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
......@@ -2862,6 +2860,18 @@ int ice_vsi_determine_xdp_res(struct ice_vsi *vsi)
return 0;
}
/**
* ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* @vsi: Pointer to VSI structure
*/
static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
{
if (test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
return ICE_RXBUF_1664;
else
return ICE_RXBUF_3072;
}
/**
* ice_xdp_setup_prog - Add or remove XDP eBPF program
* @vsi: VSI to setup XDP for
......@@ -2872,14 +2882,17 @@ static int
ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD;
bool if_running = netif_running(vsi->netdev);
int ret = 0, xdp_ring_err = 0;
if (frame_size > vsi->rx_buf_len) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for loading XDP");
if (prog && !prog->aux->xdp_has_frags) {
if (frame_size > ice_max_xdp_frame_size(vsi)) {
NL_SET_ERR_MSG_MOD(extack,
"MTU is too large for linear frames and XDP prog does not support frags");
return -EOPNOTSUPP;
}
}
/* need to stop netdev while setting up the program for Rx rings */
if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
......@@ -7330,18 +7343,6 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
dev_err(dev, "Rebuild failed, unload and reload driver\n");
}
/**
* ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
* @vsi: Pointer to VSI structure
*/
static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
{
if (PAGE_SIZE >= 8192 || test_bit(ICE_FLAG_LEGACY_RX, vsi->back->flags))
return ICE_RXBUF_2048 - XDP_PACKET_HEADROOM;
else
return ICE_RXBUF_3072;
}
/**
* ice_change_mtu - NDO callback to change the MTU
* @netdev: network interface device structure
......@@ -7354,6 +7355,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
struct ice_pf *pf = vsi->back;
struct bpf_prog *prog;
u8 count = 0;
int err = 0;
......@@ -7362,7 +7364,8 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
if (ice_is_xdp_ena_vsi(vsi)) {
prog = vsi->xdp_prog;
if (prog && !prog->aux->xdp_has_frags) {
int frame_size = ice_max_xdp_frame_size(vsi);
if (new_mtu + ICE_ETH_PKT_HDR_PAD > frame_size) {
......@@ -7370,6 +7373,12 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
frame_size - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
} else if (test_bit(ICE_FLAG_LEGACY_RX, pf->flags)) {
if (new_mtu + ICE_ETH_PKT_HDR_PAD > ICE_MAX_FRAME_LEGACY_RX) {
netdev_err(netdev, "Too big MTU for legacy-rx; Max is %d\n",
ICE_MAX_FRAME_LEGACY_RX - ICE_ETH_PKT_HDR_PAD);
return -EINVAL;
}
}
/* if a reset is in progress, wait for some time for it to complete */
......
......@@ -113,12 +113,16 @@ static void
ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf)
{
if (tx_buf->skb) {
if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) {
devm_kfree(ring->dev, tx_buf->raw_buf);
else if (ice_ring_is_xdp(ring))
page_frag_free(tx_buf->raw_buf);
} else if (ice_ring_is_xdp(ring)) {
if (ring->xsk_pool)
xsk_buff_free(tx_buf->xdp);
else
page_frag_free(tx_buf->raw_buf);
} else {
dev_kfree_skb_any(tx_buf->skb);
}
if (dma_unmap_len(tx_buf, len))
dma_unmap_single(ring->dev,
dma_unmap_addr(tx_buf, dma),
......@@ -174,8 +178,6 @@ void ice_clean_tx_ring(struct ice_tx_ring *tx_ring)
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
tx_ring->next_dd = ICE_RING_QUARTER(tx_ring) - 1;
tx_ring->next_rs = ICE_RING_QUARTER(tx_ring) - 1;
if (!tx_ring->netdev)
return;
......@@ -382,6 +384,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring)
*/
void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
{
struct xdp_buff *xdp = &rx_ring->xdp;
struct device *dev = rx_ring->dev;
u32 size;
u16 i;
......@@ -390,16 +393,16 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
if (!rx_ring->rx_buf)
return;
if (rx_ring->skb) {
dev_kfree_skb(rx_ring->skb);
rx_ring->skb = NULL;
}
if (rx_ring->xsk_pool) {
ice_xsk_clean_rx_ring(rx_ring);
goto rx_skip_free;
}
if (xdp->data) {
xdp_return_buff(xdp);
xdp->data = NULL;
}
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
......@@ -437,6 +440,7 @@ void ice_clean_rx_ring(struct ice_rx_ring *rx_ring)
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->first_desc = 0;
rx_ring->next_to_use = 0;
}
......@@ -506,6 +510,7 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
rx_ring->next_to_use = 0;
rx_ring->next_to_clean = 0;
rx_ring->first_desc = 0;
if (ice_is_xdp_ena_vsi(rx_ring->vsi))
WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
......@@ -523,8 +528,16 @@ int ice_setup_rx_ring(struct ice_rx_ring *rx_ring)
return -ENOMEM;
}
/**
* ice_rx_frame_truesize
* @rx_ring: ptr to Rx ring
* @size: size
*
* calculate the truesize with taking into the account PAGE_SIZE of
* underlying arch
*/
static unsigned int
ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused size)
ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size)
{
unsigned int truesize;
......@@ -545,34 +558,39 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused s
* @xdp: xdp_buff used as input to the XDP program
* @xdp_prog: XDP program to run
* @xdp_ring: ring to be used for XDP_TX action
* @rx_buf: Rx buffer to store the XDP action
*
* Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR}
*/
static int
static void
ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring)
struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring,
struct ice_rx_buf *rx_buf)
{
int err;
unsigned int ret = ICE_XDP_PASS;
u32 act;
if (!xdp_prog)
goto exit;
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
return ICE_XDP_PASS;
break;
case XDP_TX:
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_lock(&xdp_ring->tx_lock);
err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring);
ret = __ice_xmit_xdp_ring(xdp, xdp_ring);
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_unlock(&xdp_ring->tx_lock);
if (err == ICE_XDP_CONSUMED)
if (ret == ICE_XDP_CONSUMED)
goto out_failure;
return err;
break;
case XDP_REDIRECT:
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
if (err)
if (xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))
goto out_failure;
return ICE_XDP_REDIR;
ret = ICE_XDP_REDIR;
break;
default:
bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act);
fallthrough;
......@@ -581,8 +599,12 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
return ICE_XDP_CONSUMED;
ret = ICE_XDP_CONSUMED;
}
exit:
rx_buf->act = ret;
if (unlikely(xdp_buff_has_frags(xdp)))
ice_set_rx_bufs_act(xdp, rx_ring, ret);
}
/**
......@@ -605,6 +627,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
unsigned int queue_index = smp_processor_id();
struct ice_vsi *vsi = np->vsi;
struct ice_tx_ring *xdp_ring;
struct ice_tx_buf *tx_buf;
int nxmit = 0, i;
if (test_bit(ICE_VSI_DOWN, vsi->state))
......@@ -627,16 +650,18 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
xdp_ring = vsi->xdp_rings[queue_index];
}
tx_buf = &xdp_ring->tx_buf[xdp_ring->next_to_use];
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;
err = ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
err = ice_xmit_xdp_ring(xdpf, xdp_ring);
if (err != ICE_XDP_TX)
break;
nxmit++;
}
tx_buf->rs_idx = ice_set_rs_bit(xdp_ring);
if (unlikely(flags & XDP_XMIT_FLUSH))
ice_xdp_ring_update_tail(xdp_ring);
......@@ -706,7 +731,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi)
* buffers. Then bump tail at most one time. Grouping like this lets us avoid
* multiple tail writes per call.
*/
bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, u16 cleaned_count)
bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, unsigned int cleaned_count)
{
union ice_32b_rx_flex_desc *rx_desc;
u16 ntu = rx_ring->next_to_use;
......@@ -783,7 +808,6 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
/**
* ice_can_reuse_rx_page - Determine if page can be reused for another Rx
* @rx_buf: buffer containing the page
* @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call
*
* If page is reusable, we have a green light for calling ice_reuse_rx_page,
* which will assign the current buffer to the buffer that next_to_alloc is
......@@ -791,7 +815,7 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size)
* page freed
*/
static bool
ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf)
{
unsigned int pagecnt_bias = rx_buf->pagecnt_bias;
struct page *page = rx_buf->page;
......@@ -802,7 +826,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1))
return false;
#else
#define ICE_LAST_OFFSET \
......@@ -824,33 +848,44 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt)
}
/**
* ice_add_rx_frag - Add contents of Rx buffer to sk_buff as a frag
* ice_add_xdp_frag - Add contents of Rx buffer to xdp buf as a frag
* @rx_ring: Rx descriptor ring to transact packets on
* @xdp: xdp buff to place the data into
* @rx_buf: buffer containing page to add
* @skb: sk_buff to place the data into
* @size: packet length from rx_desc
*
* This function will add the data contained in rx_buf->page to the skb.
* It will just attach the page as a frag to the skb.
* The function will then update the page offset.
* This function will add the data contained in rx_buf->page to the xdp buf.
* It will just attach the page as a frag.
*/
static void
ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct sk_buff *skb, unsigned int size)
static int
ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
struct ice_rx_buf *rx_buf, const unsigned int size)
{
#if (PAGE_SIZE >= 8192)
unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset);
#else
unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#endif
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
if (!size)
return;
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
rx_buf->page_offset, size, truesize);
return 0;
if (!xdp_buff_has_frags(xdp)) {
sinfo->nr_frags = 0;
sinfo->xdp_frags_size = 0;
xdp_buff_set_frags_flag(xdp);
}
if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) {
if (unlikely(xdp_buff_has_frags(xdp)))
ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED);
return -ENOMEM;
}
__skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page,
rx_buf->page_offset, size);
sinfo->xdp_frags_size += size;
/* page is being used so we must update the page offset */
ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
if (page_is_pfmemalloc(rx_buf->page))
xdp_buff_set_frag_pfmemalloc(xdp);
return 0;
}
/**
......@@ -886,19 +921,18 @@ ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf)
* ice_get_rx_buf - Fetch Rx buffer and synchronize data for use
* @rx_ring: Rx descriptor ring to transact packets on
* @size: size of buffer to add to skb
* @rx_buf_pgcnt: rx_buf page refcount
*
* This function will pull an Rx buffer from the ring and synchronize it
* for use by the CPU.
*/
static struct ice_rx_buf *
ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
int *rx_buf_pgcnt)
const unsigned int ntc)
{
struct ice_rx_buf *rx_buf;
rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
*rx_buf_pgcnt =
rx_buf = &rx_ring->rx_buf[ntc];
rx_buf->pgcnt =
#if (PAGE_SIZE < 8192)
page_count(rx_buf->page);
#else
......@@ -922,26 +956,25 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size,
/**
* ice_build_skb - Build skb around an existing buffer
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
* @xdp: xdp_buff pointing to the data
*
* This function builds an skb around an existing Rx buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead.
* This function builds an skb around an existing XDP buffer, taking care
* to set up the skb correctly and avoid any memcpy overhead. Driver has
* already combined frags (if any) to skb_shared_info.
*/
static struct sk_buff *
ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct xdp_buff *xdp)
ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
u8 metasize = xdp->data - xdp->data_meta;
#if (PAGE_SIZE < 8192)
unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#else
unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
SKB_DATA_ALIGN(xdp->data_end -
xdp->data_hard_start);
#endif
struct skb_shared_info *sinfo = NULL;
unsigned int nr_frags;
struct sk_buff *skb;
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
}
/* Prefetch first cache line of first page. If xdp->data_meta
* is unused, this points exactly as xdp->data, otherwise we
* likely have a consumer accessing first few bytes of meta
......@@ -949,7 +982,7 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
*/
net_prefetch(xdp->data_meta);
/* build an skb around the page buffer */
skb = napi_build_skb(xdp->data_hard_start, truesize);
skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
return NULL;
......@@ -964,8 +997,11 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
if (metasize)
skb_metadata_set(skb, metasize);
/* buffer is used by skb, update page_offset */
ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
if (unlikely(xdp_buff_has_frags(xdp)))
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
......@@ -981,24 +1017,30 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
* skb correctly.
*/
static struct sk_buff *
ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
struct xdp_buff *xdp)
ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
{
unsigned int metasize = xdp->data - xdp->data_meta;
unsigned int size = xdp->data_end - xdp->data;
struct skb_shared_info *sinfo = NULL;
struct ice_rx_buf *rx_buf;
unsigned int nr_frags = 0;
unsigned int headlen;
struct sk_buff *skb;
/* prefetch first cache line of first page */
net_prefetch(xdp->data_meta);
net_prefetch(xdp->data);
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
}
/* allocate a skb to store the frags */
skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
ICE_RX_HDR_SIZE + metasize,
skb = __napi_alloc_skb(&rx_ring->q_vector->napi, ICE_RX_HDR_SIZE,
GFP_ATOMIC | __GFP_NOWARN);
if (unlikely(!skb))
return NULL;
rx_buf = &rx_ring->rx_buf[rx_ring->first_desc];
skb_record_rx_queue(skb, rx_ring->q_index);
/* Determine available headroom for copy */
headlen = size;
......@@ -1006,32 +1048,42 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
headlen = eth_get_headlen(skb->dev, xdp->data, ICE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
ALIGN(headlen + metasize, sizeof(long)));
if (metasize) {
skb_metadata_set(skb, metasize);
__skb_pull(skb, metasize);
}
memcpy(__skb_put(skb, headlen), xdp->data, ALIGN(headlen,
sizeof(long)));
/* if we exhaust the linear part then add what is left as a frag */
size -= headlen;
if (size) {
#if (PAGE_SIZE >= 8192)
unsigned int truesize = SKB_DATA_ALIGN(size);
#else
unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
#endif
/* besides adding here a partial frag, we are going to add
* frags from xdp_buff, make sure there is enough space for
* them
*/
if (unlikely(nr_frags >= MAX_SKB_FRAGS - 1)) {
dev_kfree_skb(skb);
return NULL;
}
skb_add_rx_frag(skb, 0, rx_buf->page,
rx_buf->page_offset + headlen, size, truesize);
/* buffer is used by skb, update page_offset */
ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
rx_buf->page_offset + headlen, size,
xdp->frame_sz);
} else {
/* buffer is unused, reset bias back to rx_buf; data was copied
* onto skb's linear part so there's no need for adjusting
* page offset and we can reuse this buffer as-is
/* buffer is unused, change the act that should be taken later
* on; data was copied onto skb's linear part so there's no
* need for adjusting page offset and we can reuse this buffer
* as-is
*/
rx_buf->pagecnt_bias++;
rx_buf->act = ICE_SKB_CONSUMED;
}
if (unlikely(xdp_buff_has_frags(xdp))) {
struct skb_shared_info *skinfo = skb_shinfo(skb);
memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
sizeof(skb_frag_t) * nr_frags);
xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
sinfo->xdp_frags_size,
nr_frags * xdp->frame_sz,
xdp_buff_is_frag_pfmemalloc(xdp));
}
return skb;
......@@ -1041,26 +1093,17 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
* ice_put_rx_buf - Clean up used buffer and either recycle or free
* @rx_ring: Rx descriptor ring to transact packets on
* @rx_buf: Rx buffer to pull data from
* @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect()
*
* This function will update next_to_clean and then clean up the contents
* of the rx_buf. It will either recycle the buffer or unmap it and free
* the associated resources.
* This function will clean up the contents of the rx_buf. It will either
* recycle the buffer or unmap it and free the associated resources.
*/
static void
ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
int rx_buf_pgcnt)
ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
u16 ntc = rx_ring->next_to_clean + 1;
/* fetch, update, and store next to clean */
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
if (!rx_buf)
return;
if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) {
if (ice_can_reuse_rx_page(rx_buf)) {
/* hand second half of page back to the ring */
ice_reuse_rx_page(rx_ring, rx_buf);
} else {
......@@ -1075,27 +1118,6 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf,
rx_buf->page = NULL;
}
/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
*
* If the buffer is an EOP buffer, this function exits returning false,
* otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static bool
ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
{
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
return false;
rx_ring->ring_stats->rx_stats.non_eop_descs++;
return true;
}
/**
* ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: Rx descriptor ring to transact packets on
......@@ -1110,39 +1132,42 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc)
*/
int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
unsigned int offset = rx_ring->rx_offset;
struct xdp_buff *xdp = &rx_ring->xdp;
struct ice_tx_ring *xdp_ring = NULL;
unsigned int xdp_res, xdp_xmit = 0;
struct sk_buff *skb = rx_ring->skb;
struct bpf_prog *xdp_prog = NULL;
struct xdp_buff xdp;
u32 ntc = rx_ring->next_to_clean;
u32 cnt = rx_ring->count;
u32 cached_ntc = ntc;
u32 xdp_xmit = 0;
u32 cached_ntu;
bool failure;
u32 first;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
frame_sz = ice_rx_frame_truesize(rx_ring, 0);
xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0);
#endif
xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq);
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
if (xdp_prog)
if (xdp_prog) {
xdp_ring = rx_ring->xdp_ring;
cached_ntu = xdp_ring->next_to_use;
}
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
union ice_32b_rx_flex_desc *rx_desc;
struct ice_rx_buf *rx_buf;
unsigned char *hard_start;
struct sk_buff *skb;
unsigned int size;
u16 stat_err_bits;
int rx_buf_pgcnt;
u16 vlan_tag = 0;
u16 rx_ptype;
/* get the Rx desc from Rx ring based on 'next_to_clean' */
rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
rx_desc = ICE_RX_DESC(rx_ring, ntc);
/* status_error_len will always be zero for unused descriptors
* because it's cleared in cleanup, and overlaps with hdr_addr
......@@ -1166,8 +1191,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
if (rx_desc->wb.rxdid == FDIR_DESC_RXDID &&
ctrl_vsi->vf)
ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
ice_put_rx_buf(rx_ring, NULL, 0);
cleaned_count++;
if (++ntc == cnt)
ntc = 0;
continue;
}
......@@ -1175,65 +1200,56 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
ICE_RX_FLX_DESC_PKT_LEN_M;
/* retrieve a buffer from the ring */
rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt);
rx_buf = ice_get_rx_buf(rx_ring, size, ntc);
if (!size) {
xdp.data = NULL;
xdp.data_end = NULL;
xdp.data_hard_start = NULL;
xdp.data_meta = NULL;
goto construct_skb;
}
if (!xdp->data) {
void *hard_start;
hard_start = page_address(rx_buf->page) + rx_buf->page_offset -
offset;
xdp_prepare_buff(&xdp, hard_start, offset, size, true);
xdp_prepare_buff(xdp, hard_start, offset, size, !!offset);
#if (PAGE_SIZE > 4096)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size);
#endif
xdp_buff_clear_frags_flag(xdp);
} else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) {
break;
}
if (++ntc == cnt)
ntc = 0;
if (!xdp_prog)
goto construct_skb;
/* skip if it is NOP desc */
if (ice_is_non_eop(rx_ring, rx_desc))
continue;
xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring);
if (!xdp_res)
ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf);
if (rx_buf->act == ICE_XDP_PASS)
goto construct_skb;
if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
xdp_xmit |= xdp_res;
ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz);
} else {
rx_buf->pagecnt_bias++;
}
total_rx_bytes += size;
total_rx_bytes += xdp_get_buff_len(xdp);
total_rx_pkts++;
cleaned_count++;
ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
xdp->data = NULL;
rx_ring->first_desc = ntc;
continue;
construct_skb:
if (skb) {
ice_add_rx_frag(rx_ring, rx_buf, skb, size);
} else if (likely(xdp.data)) {
if (ice_ring_uses_build_skb(rx_ring))
skb = ice_build_skb(rx_ring, rx_buf, &xdp);
if (likely(ice_ring_uses_build_skb(rx_ring)))
skb = ice_build_skb(rx_ring, xdp);
else
skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
}
skb = ice_construct_skb(rx_ring, xdp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
rx_ring->ring_stats->rx_stats.alloc_buf_failed++;
if (rx_buf)
rx_buf->pagecnt_bias++;
rx_ring->ring_stats->rx_stats.alloc_page_failed++;
rx_buf->act = ICE_XDP_CONSUMED;
if (unlikely(xdp_buff_has_frags(xdp)))
ice_set_rx_bufs_act(xdp, rx_ring,
ICE_XDP_CONSUMED);
xdp->data = NULL;
rx_ring->first_desc = ntc;
break;
}
ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt);
cleaned_count++;
/* skip if it is NOP desc */
if (ice_is_non_eop(rx_ring, rx_desc))
continue;
xdp->data = NULL;
rx_ring->first_desc = ntc;
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
if (unlikely(ice_test_staterr(rx_desc->wb.status_error0,
......@@ -1245,10 +1261,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc);
/* pad the skb if needed, to make a valid ethernet frame */
if (eth_skb_pad(skb)) {
skb = NULL;
if (eth_skb_pad(skb))
continue;
}
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
......@@ -1262,18 +1276,34 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb);
/* send completed skb up the stack */
ice_receive_skb(rx_ring, skb, vlan_tag);
skb = NULL;
/* update budget accounting */
total_rx_pkts++;
}
first = rx_ring->first_desc;
while (cached_ntc != first) {
struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc];
if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) {
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
xdp_xmit |= buf->act;
} else if (buf->act & ICE_XDP_CONSUMED) {
buf->pagecnt_bias++;
} else if (buf->act == ICE_XDP_PASS) {
ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz);
}
ice_put_rx_buf(rx_ring, buf);
if (++cached_ntc >= cnt)
cached_ntc = 0;
}
rx_ring->next_to_clean = ntc;
/* return up to cleaned_count buffers to hardware */
failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring));
if (xdp_prog)
ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
rx_ring->skb = skb;
if (xdp_xmit)
ice_finalize_xdp_rx(xdp_ring, xdp_xmit, cached_ntu);
if (rx_ring->ring_stats)
ice_update_rx_ring_stats(rx_ring, total_rx_pkts,
......
......@@ -9,10 +9,12 @@
#define ICE_DFLT_IRQ_WORK 256
#define ICE_RXBUF_3072 3072
#define ICE_RXBUF_2048 2048
#define ICE_RXBUF_1664 1664
#define ICE_RXBUF_1536 1536
#define ICE_MAX_CHAINED_RX_BUFS 5
#define ICE_MAX_BUF_TXD 8
#define ICE_MIN_TX_LEN 17
#define ICE_MAX_FRAME_LEGACY_RX 8320
/* The size limit for a transmit buffer in a descriptor is (16K - 1).
* In order to align with the read requests we will align the value to
......@@ -110,6 +112,10 @@ static inline int ice_skb_pad(void)
(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
(R)->next_to_clean - (R)->next_to_use - 1)
#define ICE_RX_DESC_UNUSED(R) \
((((R)->first_desc > (R)->next_to_use) ? 0 : (R)->count) + \
(R)->first_desc - (R)->next_to_use - 1)
#define ICE_RING_QUARTER(R) ((R)->count >> 2)
#define ICE_TX_FLAGS_TSO BIT(0)
......@@ -134,6 +140,7 @@ static inline int ice_skb_pad(void)
#define ICE_XDP_TX BIT(1)
#define ICE_XDP_REDIR BIT(2)
#define ICE_XDP_EXIT BIT(3)
#define ICE_SKB_CONSUMED ICE_XDP_CONSUMED
#define ICE_RX_DMA_ATTR \
(DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
......@@ -143,13 +150,20 @@ static inline int ice_skb_pad(void)
#define ICE_TXD_LAST_DESC_CMD (ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)
struct ice_tx_buf {
union {
struct ice_tx_desc *next_to_watch;
u32 rs_idx;
};
union {
struct sk_buff *skb;
void *raw_buf; /* used for XDP */
struct xdp_buff *xdp; /* used for XDP_TX ZC */
};
unsigned int bytecount;
unsigned short gso_segs;
union {
unsigned int gso_segs;
unsigned int nr_frags; /* used for mbuf XDP */
};
u32 tx_flags;
DEFINE_DMA_UNMAP_LEN(len);
DEFINE_DMA_UNMAP_ADDR(dma);
......@@ -170,7 +184,9 @@ struct ice_rx_buf {
dma_addr_t dma;
struct page *page;
unsigned int page_offset;
u16 pagecnt_bias;
unsigned int pgcnt;
unsigned int act;
unsigned int pagecnt_bias;
};
struct ice_q_stats {
......@@ -273,42 +289,44 @@ struct ice_rx_ring {
struct ice_vsi *vsi; /* Backreference to associated VSI */
struct ice_q_vector *q_vector; /* Backreference to associated vector */
u8 __iomem *tail;
u16 q_index; /* Queue number of ring */
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
u16 next_to_alloc;
/* CL2 - 2nd cacheline starts here */
union {
struct ice_rx_buf *rx_buf;
struct xdp_buff **xdp_buf;
};
/* CL2 - 2nd cacheline starts here */
struct xdp_rxq_info xdp_rxq;
struct xdp_buff xdp;
/* CL3 - 3rd cacheline starts here */
u16 q_index; /* Queue number of ring */
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
struct bpf_prog *xdp_prog;
u16 rx_offset;
/* used in interrupt processing */
u16 next_to_use;
u16 next_to_clean;
u16 next_to_alloc;
u16 rx_offset;
u16 rx_buf_len;
u16 first_desc;
/* stats structs */
struct ice_ring_stats *ring_stats;
struct rcu_head rcu; /* to avoid race on free */
/* CL4 - 3rd cacheline starts here */
/* CL4 - 4th cacheline starts here */
struct ice_channel *ch;
struct bpf_prog *xdp_prog;
struct ice_tx_ring *xdp_ring;
struct xsk_buff_pool *xsk_pool;
struct sk_buff *skb;
dma_addr_t dma; /* physical address of ring */
u64 cached_phctime;
u16 rx_buf_len;
u8 dcb_tc; /* Traffic class of ring */
u8 ptp_rx;
#define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1)
#define ICE_RX_FLAGS_CRC_STRIP_DIS BIT(2)
u8 flags;
/* CL5 - 5th cacheline starts here */
struct xdp_rxq_info xdp_rxq;
} ____cacheline_internodealigned_in_smp;
struct ice_tx_ring {
......@@ -326,12 +344,11 @@ struct ice_tx_ring {
struct xsk_buff_pool *xsk_pool;
u16 next_to_use;
u16 next_to_clean;
u16 next_rs;
u16 next_dd;
u16 q_handle; /* Queue handle per TC */
u16 reg_idx; /* HW register index of the ring */
u16 count; /* Number of descriptors */
u16 q_index; /* Queue number of ring */
u16 xdp_tx_active;
/* stats structs */
struct ice_ring_stats *ring_stats;
/* CL3 - 3rd cacheline starts here */
......@@ -342,7 +359,6 @@ struct ice_tx_ring {
spinlock_t tx_lock;
u32 txq_teid; /* Added Tx queue TEID */
/* CL4 - 4th cacheline starts here */
u16 xdp_tx_active;
#define ICE_TX_FLAGS_RING_XDP BIT(0)
#define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1)
#define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2)
......@@ -431,7 +447,7 @@ static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring)
union ice_32b_rx_flex_desc;
bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, u16 cleaned_count);
bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, unsigned int cleaned_count);
netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev);
u16
ice_select_queue(struct net_device *dev, struct sk_buff *skb,
......
......@@ -220,129 +220,194 @@ ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag)
napi_gro_receive(&rx_ring->q_vector->napi, skb);
}
/**
* ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
* @xdp_ring: XDP Tx ring
* @tx_buf: Tx buffer to clean
*/
static void
ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
{
dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
xdp_ring->xdp_tx_active--;
page_frag_free(tx_buf->raw_buf);
tx_buf->raw_buf = NULL;
}
/**
* ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring
* @xdp_ring: XDP ring to clean
*/
static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
static u32 ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring)
{
unsigned int total_bytes = 0, total_pkts = 0;
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *next_dd_desc;
u16 next_dd = xdp_ring->next_dd;
struct ice_tx_buf *tx_buf;
int i;
int total_bytes = 0, total_pkts = 0;
u32 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
u32 cnt = xdp_ring->count;
u32 ready_frames = 0;
u32 frags;
u32 idx;
u32 ret;
idx = xdp_ring->tx_buf[ntc].rs_idx;
tx_desc = ICE_TX_DESC(xdp_ring, idx);
if (tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) {
if (idx >= ntc)
ready_frames = idx - ntc + 1;
else
ready_frames = idx + cnt - ntc + 1;
}
next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd);
if (!(next_dd_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
return;
if (!ready_frames)
return 0;
ret = ready_frames;
for (i = 0; i < tx_thresh; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
while (ready_frames) {
struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
/* bytecount holds size of head + frags */
total_bytes += tx_buf->bytecount;
/* normally tx_buf->gso_segs was taken but at this point
* it's always 1 for us
*/
frags = tx_buf->nr_frags;
total_pkts++;
/* count head + frags */
ready_frames -= frags + 1;
page_frag_free(tx_buf->raw_buf);
dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
tx_buf->raw_buf = NULL;
if (xdp_ring->xsk_pool)
xsk_buff_free(tx_buf->xdp);
else
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
ntc++;
if (ntc == cnt)
ntc = 0;
for (int i = 0; i < frags; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
ntc++;
if (ntc >= xdp_ring->count)
if (ntc == cnt)
ntc = 0;
}
}
next_dd_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_dd = xdp_ring->next_dd + tx_thresh;
if (xdp_ring->next_dd > xdp_ring->count)
xdp_ring->next_dd = tx_thresh - 1;
tx_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_to_clean = ntc;
ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes);
return ret;
}
/**
* ice_xmit_xdp_ring - submit single packet to XDP ring for transmission
* @data: packet data pointer
* @size: packet data size
* __ice_xmit_xdp_ring - submit frame to XDP ring for transmission
* @xdp: XDP buffer to be placed onto Tx descriptors
* @xdp_ring: XDP ring for transmission
*/
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring)
int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring)
{
u16 tx_thresh = ICE_RING_QUARTER(xdp_ring);
u16 i = xdp_ring->next_to_use;
struct skb_shared_info *sinfo = NULL;
u32 size = xdp->data_end - xdp->data;
struct device *dev = xdp_ring->dev;
u32 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_head;
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
u32 cnt = xdp_ring->count;
void *data = xdp->data;
u32 nr_frags = 0;
u32 free_space;
u32 frag = 0;
free_space = ICE_DESC_UNUSED(xdp_ring);
if (ICE_DESC_UNUSED(xdp_ring) < tx_thresh)
ice_clean_xdp_irq(xdp_ring);
if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring))
free_space += ice_clean_xdp_irq(xdp_ring);
if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) {
if (unlikely(xdp_buff_has_frags(xdp))) {
sinfo = xdp_get_shared_info_from_buff(xdp);
nr_frags = sinfo->nr_frags;
if (free_space < nr_frags + 1) {
xdp_ring->ring_stats->tx_stats.tx_busy++;
return ICE_XDP_CONSUMED;
}
}
dma = dma_map_single(xdp_ring->dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(xdp_ring->dev, dma))
return ICE_XDP_CONSUMED;
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_head = &xdp_ring->tx_buf[ntu];
tx_buf = tx_head;
tx_buf = &xdp_ring->tx_buf[i];
tx_buf->bytecount = size;
tx_buf->gso_segs = 1;
tx_buf->raw_buf = data;
for (;;) {
dma_addr_t dma;
dma = dma_map_single(dev, data, size, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto dma_unmap;
/* record length, and DMA address */
dma_unmap_len_set(tx_buf, len, size);
dma_unmap_addr_set(tx_buf, dma, dma);
tx_desc = ICE_TX_DESC(xdp_ring, i);
tx_desc->buf_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0,
size, 0);
tx_desc->cmd_type_offset_bsz = ice_build_ctob(0, 0, size, 0);
tx_buf->raw_buf = data;
xdp_ring->xdp_tx_active++;
i++;
if (i == xdp_ring->count) {
i = 0;
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs = tx_thresh - 1;
ntu++;
if (ntu == cnt)
ntu = 0;
if (frag == nr_frags)
break;
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_buf = &xdp_ring->tx_buf[ntu];
data = skb_frag_address(&sinfo->frags[frag]);
size = skb_frag_size(&sinfo->frags[frag]);
frag++;
}
xdp_ring->next_to_use = i;
if (i > xdp_ring->next_rs) {
tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs);
/* store info about bytecount and frag count in first desc */
tx_head->bytecount = xdp_get_buff_len(xdp);
tx_head->nr_frags = nr_frags;
/* update last descriptor from a frame with EOP */
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
xdp_ring->next_rs += tx_thresh;
}
cpu_to_le64(ICE_TX_DESC_CMD_EOP << ICE_TXD_QW1_CMD_S);
xdp_ring->xdp_tx_active++;
xdp_ring->next_to_use = ntu;
return ICE_XDP_TX;
dma_unmap:
for (;;) {
tx_buf = &xdp_ring->tx_buf[ntu];
dma_unmap_page(dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
if (tx_buf == tx_head)
break;
if (!ntu)
ntu += cnt;
ntu--;
}
return ICE_XDP_CONSUMED;
}
/**
* ice_xmit_xdp_buff - convert an XDP buffer to an XDP frame and send it
* @xdp: XDP buffer
* @xdp_ring: XDP Tx ring
*
* Returns negative on failure, 0 on success.
* ice_xmit_xdp_ring - submit frame to XDP ring for transmission
* @xdpf: XDP frame that will be converted to XDP buff
* @xdp_ring: XDP ring for transmission
*/
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring)
int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring)
{
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
struct xdp_buff xdp;
if (unlikely(!xdpf))
return ICE_XDP_CONSUMED;
return ice_xmit_xdp_ring(xdpf->data, xdpf->len, xdp_ring);
xdp_convert_frame_to_buff(xdpf, &xdp);
return __ice_xmit_xdp_ring(&xdp, xdp_ring);
}
/**
......@@ -354,14 +419,21 @@ int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring)
* should be called when a batch of packets has been processed in the
* napi loop.
*/
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res)
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res,
u32 first_idx)
{
struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[first_idx];
if (xdp_res & ICE_XDP_REDIR)
xdp_do_flush_map();
if (xdp_res & ICE_XDP_TX) {
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_lock(&xdp_ring->tx_lock);
/* store index of descriptor with RS bit set in the first
* ice_tx_buf of given NAPI batch
*/
tx_buf->rs_idx = ice_set_rs_bit(xdp_ring);
ice_xdp_ring_update_tail(xdp_ring);
if (static_branch_unlikely(&ice_xdp_locking_key))
spin_unlock(&xdp_ring->tx_lock);
......
......@@ -5,6 +5,36 @@
#define _ICE_TXRX_LIB_H_
#include "ice.h"
/**
* ice_set_rx_bufs_act - propagate Rx buffer action to frags
* @xdp: XDP buffer representing frame (linear and frags part)
* @rx_ring: Rx ring struct
* act: action to store onto Rx buffers related to XDP buffer parts
*
* Set action that should be taken before putting Rx buffer from first frag
* to one before last. Last one is handled by caller of this function as it
* is the EOP frag that is currently being processed. This function is
* supposed to be called only when XDP buffer contains frags.
*/
static inline void
ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring,
const unsigned int act)
{
const struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
u32 first = rx_ring->first_desc;
u32 nr_frags = sinfo->nr_frags;
u32 cnt = rx_ring->count;
struct ice_rx_buf *buf;
for (int i = 0; i < nr_frags; i++) {
buf = &rx_ring->rx_buf[first];
buf->act = act;
if (++first == cnt)
first = 0;
}
}
/**
* ice_test_staterr - tests bits in Rx descriptor status and error fields
* @status_err_n: Rx descriptor status_error0 or status_error1 bits
......@@ -21,6 +51,28 @@ ice_test_staterr(__le16 status_err_n, const u16 stat_err_bits)
return !!(status_err_n & cpu_to_le16(stat_err_bits));
}
/**
* ice_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
*
* If the buffer is an EOP buffer, this function exits returning false,
* otherwise return true indicating that this is in fact a non-EOP buffer.
*/
static inline bool
ice_is_non_eop(const struct ice_rx_ring *rx_ring,
const union ice_32b_rx_flex_desc *rx_desc)
{
/* if we are the last buffer then there is nothing else to do */
#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF)))
return false;
rx_ring->ring_stats->rx_stats.non_eop_descs++;
return true;
}
static inline __le64
ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
{
......@@ -70,9 +122,28 @@ static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring)
writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
}
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res);
/**
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
*
* returns index of descriptor that had RS bit produced on
*/
static inline u32 ice_set_rs_bit(const struct ice_tx_ring *xdp_ring)
{
u32 rs_idx = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct ice_tx_desc *tx_desc;
tx_desc = ICE_TX_DESC(xdp_ring, rs_idx);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
return rs_idx;
}
void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, u32 first_idx);
int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring);
int ice_xmit_xdp_ring(struct xdp_frame *xdpf, struct ice_tx_ring *xdp_ring);
int __ice_xmit_xdp_ring(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
void
ice_process_skb_fields(struct ice_rx_ring *rx_ring,
......
......@@ -597,6 +597,107 @@ ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
return skb;
}
/**
* ice_clean_xdp_irq_zc - AF_XDP ZC specific Tx cleaning routine
* @xdp_ring: XDP Tx ring
*/
static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
{
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
u16 cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
u16 xsk_frames = 0;
u16 last_rs;
int i;
last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
if (tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)) {
if (last_rs >= ntc)
xsk_frames = last_rs - ntc + 1;
else
xsk_frames = last_rs + cnt - ntc + 1;
}
if (!xsk_frames)
return;
if (likely(!xdp_ring->xdp_tx_active))
goto skip;
ntc = xdp_ring->next_to_clean;
for (i = 0; i < xsk_frames; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->xdp) {
xsk_buff_free(tx_buf->xdp);
xdp_ring->xdp_tx_active--;
} else {
xsk_frames++;
}
ntc++;
if (ntc == cnt)
ntc = 0;
}
skip:
tx_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_to_clean += xsk_frames;
if (xdp_ring->next_to_clean >= cnt)
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
}
/**
* ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX
* @xdp: XDP buffer to xmit
* @xdp_ring: XDP ring to produce descriptor onto
*
* note that this function works directly on xdp_buff, no need to convert
* it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning
* side will be able to xsk_buff_free() it.
*
* Returns ICE_XDP_TX for successfully produced desc, ICE_XDP_CONSUMED if there
* was not enough space on XDP ring
*/
static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp,
struct ice_tx_ring *xdp_ring)
{
u32 size = xdp->data_end - xdp->data;
u32 ntu = xdp_ring->next_to_use;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
dma_addr_t dma;
if (ICE_DESC_UNUSED(xdp_ring) < ICE_RING_QUARTER(xdp_ring)) {
ice_clean_xdp_irq_zc(xdp_ring);
if (!ICE_DESC_UNUSED(xdp_ring)) {
xdp_ring->ring_stats->tx_stats.tx_busy++;
return ICE_XDP_CONSUMED;
}
}
dma = xsk_buff_xdp_get_dma(xdp);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size);
tx_buf = &xdp_ring->tx_buf[ntu];
tx_buf->xdp = xdp;
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_desc->buf_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP,
0, size, 0);
xdp_ring->xdp_tx_active++;
if (++ntu == xdp_ring->count)
ntu = 0;
xdp_ring->next_to_use = ntu;
return ICE_XDP_TX;
}
/**
* ice_run_xdp_zc - Executes an XDP program in zero-copy path
* @rx_ring: Rx ring
......@@ -630,7 +731,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
case XDP_PASS:
break;
case XDP_TX:
result = ice_xmit_xdp_buff(xdp, xdp_ring);
result = ice_xmit_xdp_tx_zc(xdp, xdp_ring);
if (result == ICE_XDP_CONSUMED)
goto out_failure;
break;
......@@ -760,7 +861,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
if (entries_to_alloc > ICE_RING_QUARTER(rx_ring))
failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc);
ice_finalize_xdp_rx(xdp_ring, xdp_xmit);
ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0);
ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes);
if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) {
......@@ -775,75 +876,6 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
return failure ? budget : (int)total_rx_packets;
}
/**
* ice_clean_xdp_tx_buf - Free and unmap XDP Tx buffer
* @xdp_ring: XDP Tx ring
* @tx_buf: Tx buffer to clean
*/
static void
ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf)
{
page_frag_free(tx_buf->raw_buf);
xdp_ring->xdp_tx_active--;
dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma),
dma_unmap_len(tx_buf, len), DMA_TO_DEVICE);
dma_unmap_len_set(tx_buf, len, 0);
}
/**
* ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ
* @xdp_ring: XDP Tx ring
*/
static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring)
{
u16 ntc = xdp_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
u16 cnt = xdp_ring->count;
struct ice_tx_buf *tx_buf;
u16 xsk_frames = 0;
u16 last_rs;
int i;
last_rs = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : cnt - 1;
tx_desc = ICE_TX_DESC(xdp_ring, last_rs);
if ((tx_desc->cmd_type_offset_bsz &
cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
if (last_rs >= ntc)
xsk_frames = last_rs - ntc + 1;
else
xsk_frames = last_rs + cnt - ntc + 1;
}
if (!xsk_frames)
return;
if (likely(!xdp_ring->xdp_tx_active))
goto skip;
ntc = xdp_ring->next_to_clean;
for (i = 0; i < xsk_frames; i++) {
tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->raw_buf) {
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
tx_buf->raw_buf = NULL;
} else {
xsk_frames++;
}
ntc++;
if (ntc >= xdp_ring->count)
ntc = 0;
}
skip:
tx_desc->cmd_type_offset_bsz = 0;
xdp_ring->next_to_clean += xsk_frames;
if (xdp_ring->next_to_clean >= cnt)
xdp_ring->next_to_clean -= cnt;
if (xsk_frames)
xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames);
}
/**
* ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor
* @xdp_ring: XDP ring to produce the HW Tx descriptor on
......@@ -917,20 +949,6 @@ static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *d
ice_xmit_pkt(xdp_ring, &descs[i], total_bytes);
}
/**
* ice_set_rs_bit - set RS bit on last produced descriptor (one behind current NTU)
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
*/
static void ice_set_rs_bit(struct ice_tx_ring *xdp_ring)
{
u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct ice_tx_desc *tx_desc;
tx_desc = ICE_TX_DESC(xdp_ring, ntu);
tx_desc->cmd_type_offset_bsz |=
cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S);
}
/**
* ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring
* @xdp_ring: XDP ring to produce the HW Tx descriptors on
......@@ -1065,8 +1083,8 @@ void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring)
while (ntc != ntu) {
struct ice_tx_buf *tx_buf = &xdp_ring->tx_buf[ntc];
if (tx_buf->raw_buf)
ice_clean_xdp_tx_buf(xdp_ring, tx_buf);
if (tx_buf->xdp)
xsk_buff_free(tx_buf->xdp);
else
xsk_frames++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment