Commit 9ead7e74 authored by Maciej Fijalkowski's avatar Maciej Fijalkowski Committed by Tony Nguyen

ice: xsk: use Rx ring's XDP ring when picking NAPI context

Ice driver allocates per cpu XDP queues so that redirect path can safely
use smp_processor_id() as an index to the array. At the same time
though, XDP rings are used to pick NAPI context to call napi_schedule()
or set NAPIF_STATE_MISSED. When user reduces queue count, say to 8, and
num_possible_cpus() of underlying platform is 44, then this means queue
vectors with correlated NAPI contexts will carry several XDP queues.

This in turn can result in a broken behavior where NAPI context of
interest will never be scheduled and AF_XDP socket will not process any
traffic.

To fix this, let us change the way how XDP rings are assigned to Rx
rings and use this information later on when setting
ice_tx_ring::xsk_pool pointer. For each Rx ring, grab the associated
queue vector and walk through Tx ring's linked list. Once we stumble
upon XDP ring in it, assign this ring to ice_rx_ring::xdp_ring.

Previous [0] approach of fixing this issue was for txonly scenario
because of the described grouping of XDP rings across queue vectors. So,
relying on Rx ring meant that NAPI context could be scheduled with a
queue vector without XDP ring with associated XSK pool.

[0]: https://lore.kernel.org/netdev/20220707161128.54215-1-maciej.fijalkowski@intel.com/

Fixes: 2d4238f5 ("ice: Add support for AF_XDP")
Fixes: 22bf877e ("ice: introduce XDP_TX fallback path")
Signed-off-by: default avatarMaciej Fijalkowski <maciej.fijalkowski@intel.com>
Tested-by: default avatarGeorge Kuruvinakunnel <george.kuruvinakunnel@intel.com>
Signed-off-by: default avatarTony Nguyen <anthony.l.nguyen@intel.com>
parent 5a42f112
...@@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) ...@@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring)
* ice_xsk_pool - get XSK buffer pool bound to a ring * ice_xsk_pool - get XSK buffer pool bound to a ring
* @ring: Rx ring to use * @ring: Rx ring to use
* *
* Returns a pointer to xdp_umem structure if there is a buffer pool present, * Returns a pointer to xsk_buff_pool structure if there is a buffer pool
* NULL otherwise. * present, NULL otherwise.
*/ */
static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
{ {
...@@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) ...@@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring)
} }
/** /**
* ice_tx_xsk_pool - get XSK buffer pool bound to a ring * ice_tx_xsk_pool - assign XSK buff pool to XDP ring
* @ring: Tx ring to use * @vsi: pointer to VSI
* @qid: index of a queue to look at XSK buff pool presence
* *
* Returns a pointer to xdp_umem structure if there is a buffer pool present, * Sets XSK buff pool pointer on XDP ring.
* NULL otherwise. Tx equivalent of ice_xsk_pool. *
* XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided
* queue id. Reason for doing so is that queue vectors might have assigned more
* than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring
* carries a pointer to one of these XDP rings for its own purposes, such as
* handling XDP_TX action, therefore we can piggyback here on the
* rx_ring->xdp_ring assignment that was done during XDP rings initialization.
*/ */
static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid)
{ {
struct ice_vsi *vsi = ring->vsi; struct ice_tx_ring *ring;
u16 qid;
qid = ring->q_index - vsi->alloc_txq; ring = vsi->rx_rings[qid]->xdp_ring;
if (!ring)
return;
if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) {
return NULL; ring->xsk_pool = NULL;
return;
}
return xsk_get_pool_from_qid(vsi->netdev, qid); ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid);
} }
/** /**
......
...@@ -1986,8 +1986,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) ...@@ -1986,8 +1986,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
if (ret) if (ret)
return ret; return ret;
ice_for_each_xdp_txq(vsi, i) ice_for_each_rxq(vsi, i)
vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); ice_tx_xsk_pool(vsi, i);
return ret; return ret;
} }
......
...@@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) ...@@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
if (ice_setup_tx_ring(xdp_ring)) if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings; goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring); ice_set_ring_xdp(xdp_ring);
xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring);
spin_lock_init(&xdp_ring->tx_lock); spin_lock_init(&xdp_ring->tx_lock);
for (j = 0; j < xdp_ring->count; j++) { for (j = 0; j < xdp_ring->count; j++) {
tx_desc = ICE_TX_DESC(xdp_ring, j); tx_desc = ICE_TX_DESC(xdp_ring, j);
...@@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) ...@@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
} }
} }
ice_for_each_rxq(vsi, i) {
if (static_key_enabled(&ice_xdp_locking_key))
vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
else
vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i];
}
return 0; return 0;
free_xdp_rings: free_xdp_rings:
...@@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) ...@@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
xdp_rings_rem -= xdp_rings_per_v; xdp_rings_rem -= xdp_rings_per_v;
} }
ice_for_each_rxq(vsi, i) {
if (static_key_enabled(&ice_xdp_locking_key)) {
vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq];
} else {
struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector;
struct ice_tx_ring *ring;
ice_for_each_tx_ring(ring, q_vector->tx) {
if (ice_ring_is_xdp(ring)) {
vsi->rx_rings[i]->xdp_ring = ring;
break;
}
}
}
ice_tx_xsk_pool(vsi, i);
}
/* omit the scheduler update if in reset path; XDP queues will be /* omit the scheduler update if in reset path; XDP queues will be
* taken into account at the end of ice_vsi_rebuild, where * taken into account at the end of ice_vsi_rebuild, where
* ice_cfg_vsi_lan is being called * ice_cfg_vsi_lan is being called
......
...@@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) ...@@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx)
if (err) if (err)
goto free_buf; goto free_buf;
ice_set_ring_xdp(xdp_ring); ice_set_ring_xdp(xdp_ring);
xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); ice_tx_xsk_pool(vsi, q_idx);
} }
err = ice_vsi_cfg_rxq(rx_ring); err = ice_vsi_cfg_rxq(rx_ring);
...@@ -359,7 +359,7 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) ...@@ -359,7 +359,7 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
if (if_running) { if (if_running) {
ret = ice_qp_ena(vsi, qid); ret = ice_qp_ena(vsi, qid);
if (!ret && pool_present) if (!ret && pool_present)
napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi); napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
else if (ret) else if (ret)
netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
} }
...@@ -950,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, ...@@ -950,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id,
if (!ice_is_xdp_ena_vsi(vsi)) if (!ice_is_xdp_ena_vsi(vsi))
return -EINVAL; return -EINVAL;
if (queue_id >= vsi->num_txq) if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq)
return -EINVAL; return -EINVAL;
if (!vsi->xdp_rings[queue_id]->xsk_pool) ring = vsi->rx_rings[queue_id]->xdp_ring;
return -EINVAL;
ring = vsi->xdp_rings[queue_id]; if (!ring->xsk_pool)
return -EINVAL;
/* The idea here is that if NAPI is running, mark a miss, so /* The idea here is that if NAPI is running, mark a miss, so
* it will run again. If not, trigger an interrupt and * it will run again. If not, trigger an interrupt and
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment