Commit 74608d17 authored by Björn Töpel's avatar Björn Töpel Committed by Jeff Kirsher

i40e: add support for XDP_TX action

This patch adds proper XDP_TX action support. For each Tx ring, an
additional XDP Tx ring is allocated and setup. This version does the
DMA mapping in the fast-path, which will penalize performance for
IOMMU enabled systems. Further, debugfs support is not wired up for
the XDP Tx rings.
Signed-off-by: default avatarBjörn Töpel <bjorn.topel@intel.com>
Tested-by: default avatarAndrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: default avatarJeff Kirsher <jeffrey.t.kirsher@intel.com>
parent 0c8493d9
......@@ -629,6 +629,7 @@ struct i40e_vsi {
/* These are containers of ring pointers, allocated at run-time */
struct i40e_ring **rx_rings;
struct i40e_ring **tx_rings;
struct i40e_ring **xdp_rings; /* XDP Tx rings */
u32 active_filters;
u32 promisc_threshold;
......
......@@ -1299,6 +1299,17 @@ static void i40e_get_ringparam(struct net_device *netdev,
ring->rx_jumbo_pending = 0;
}
static bool i40e_active_tx_ring_index(struct i40e_vsi *vsi, u16 index)
{
if (i40e_enabled_xdp_vsi(vsi)) {
return index < vsi->num_queue_pairs ||
(index >= vsi->alloc_queue_pairs &&
index < vsi->alloc_queue_pairs + vsi->num_queue_pairs);
}
return index < vsi->num_queue_pairs;
}
static int i40e_set_ringparam(struct net_device *netdev,
struct ethtool_ringparam *ring)
{
......@@ -1308,6 +1319,7 @@ static int i40e_set_ringparam(struct net_device *netdev,
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
u32 new_rx_count, new_tx_count;
u16 tx_alloc_queue_pairs;
int timeout = 50;
int i, err = 0;
......@@ -1345,6 +1357,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
for (i = 0; i < vsi->num_queue_pairs; i++) {
vsi->tx_rings[i]->count = new_tx_count;
vsi->rx_rings[i]->count = new_rx_count;
if (i40e_enabled_xdp_vsi(vsi))
vsi->xdp_rings[i]->count = new_tx_count;
}
goto done;
}
......@@ -1354,20 +1368,24 @@ static int i40e_set_ringparam(struct net_device *netdev,
* to the Tx and Rx ring structs.
*/
/* alloc updated Tx resources */
/* alloc updated Tx and XDP Tx resources */
tx_alloc_queue_pairs = vsi->alloc_queue_pairs *
(i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
if (new_tx_count != vsi->tx_rings[0]->count) {
netdev_info(netdev,
"Changing Tx descriptor count from %d to %d.\n",
vsi->tx_rings[0]->count, new_tx_count);
tx_rings = kcalloc(vsi->alloc_queue_pairs,
tx_rings = kcalloc(tx_alloc_queue_pairs,
sizeof(struct i40e_ring), GFP_KERNEL);
if (!tx_rings) {
err = -ENOMEM;
goto done;
}
for (i = 0; i < vsi->num_queue_pairs; i++) {
/* clone ring and setup updated count */
for (i = 0; i < tx_alloc_queue_pairs; i++) {
if (!i40e_active_tx_ring_index(vsi, i))
continue;
tx_rings[i] = *vsi->tx_rings[i];
tx_rings[i].count = new_tx_count;
/* the desc and bi pointers will be reallocated in the
......@@ -1379,6 +1397,8 @@ static int i40e_set_ringparam(struct net_device *netdev,
if (err) {
while (i) {
i--;
if (!i40e_active_tx_ring_index(vsi, i))
continue;
i40e_free_tx_resources(&tx_rings[i]);
}
kfree(tx_rings);
......@@ -1446,9 +1466,11 @@ static int i40e_set_ringparam(struct net_device *netdev,
i40e_down(vsi);
if (tx_rings) {
for (i = 0; i < vsi->num_queue_pairs; i++) {
i40e_free_tx_resources(vsi->tx_rings[i]);
*vsi->tx_rings[i] = tx_rings[i];
for (i = 0; i < tx_alloc_queue_pairs; i++) {
if (i40e_active_tx_ring_index(vsi, i)) {
i40e_free_tx_resources(vsi->tx_rings[i]);
*vsi->tx_rings[i] = tx_rings[i];
}
}
kfree(tx_rings);
tx_rings = NULL;
......@@ -1479,8 +1501,10 @@ static int i40e_set_ringparam(struct net_device *netdev,
free_tx:
/* error cleanup if the Rx allocations failed after getting Tx */
if (tx_rings) {
for (i = 0; i < vsi->num_queue_pairs; i++)
i40e_free_tx_resources(&tx_rings[i]);
for (i = 0; i < tx_alloc_queue_pairs; i++) {
if (i40e_active_tx_ring_index(vsi, i))
i40e_free_tx_resources(vsi->tx_rings[i]);
}
kfree(tx_rings);
tx_rings = NULL;
}
......
This diff is collapsed.
......@@ -630,6 +630,8 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
if (tx_buffer->skb) {
if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
kfree(tx_buffer->raw_buf);
else if (ring_is_xdp(ring))
page_frag_free(tx_buffer->raw_buf);
else
dev_kfree_skb_any(tx_buffer->skb);
if (dma_unmap_len(tx_buffer, len))
......@@ -771,8 +773,11 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
total_bytes += tx_buf->bytecount;
total_packets += tx_buf->gso_segs;
/* free the skb */
napi_consume_skb(tx_buf->skb, napi_budget);
/* free the skb/XDP data */
if (ring_is_xdp(tx_ring))
page_frag_free(tx_buf->raw_buf);
else
napi_consume_skb(tx_buf->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
......@@ -848,6 +853,9 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
tx_ring->arm_wb = true;
}
if (ring_is_xdp(tx_ring))
return !!budget;
/* notify netdev of completed buffers */
netdev_tx_completed_queue(txring_txq(tx_ring),
total_packets, total_bytes);
......@@ -1969,6 +1977,10 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
#define I40E_XDP_PASS 0
#define I40E_XDP_CONSUMED 1
#define I40E_XDP_TX 2
static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
struct i40e_ring *xdp_ring);
/**
* i40e_run_xdp - run an XDP program
......@@ -1979,6 +1991,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
struct xdp_buff *xdp)
{
int result = I40E_XDP_PASS;
struct i40e_ring *xdp_ring;
struct bpf_prog *xdp_prog;
u32 act;
......@@ -1992,9 +2005,12 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
result = i40e_xmit_xdp_ring(xdp, xdp_ring);
break;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_TX:
case XDP_ABORTED:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
/* fallthrough -- handle aborts by dropping packet */
......@@ -2007,6 +2023,27 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
return ERR_PTR(-result);
}
/**
* i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
* @rx_ring: Rx ring
* @rx_buffer: Rx buffer to adjust
* @size: Size of adjustment
**/
static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
struct i40e_rx_buffer *rx_buffer,
unsigned int size)
{
#if (PAGE_SIZE < 8192)
unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
rx_buffer->page_offset ^= truesize;
#else
unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);
rx_buffer->page_offset += truesize;
#endif
}
/**
* i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
* @rx_ring: rx descriptor ring to transact packets on
......@@ -2024,7 +2061,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
bool failure = false;
bool failure = false, xdp_xmit = false;
while (likely(total_rx_packets < budget)) {
struct i40e_rx_buffer *rx_buffer;
......@@ -2081,9 +2118,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
}
if (IS_ERR(skb)) {
if (PTR_ERR(skb) == -I40E_XDP_TX) {
xdp_xmit = true;
i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
} else {
rx_buffer->pagecnt_bias++;
}
total_rx_bytes += size;
total_rx_packets++;
rx_buffer->pagecnt_bias++;
} else if (skb) {
i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
} else if (ring_uses_build_skb(rx_ring)) {
......@@ -2131,6 +2173,19 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
total_rx_packets++;
}
if (xdp_xmit) {
struct i40e_ring *xdp_ring;
xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch.
*/
wmb();
writel(xdp_ring->next_to_use, xdp_ring->tail);
}
rx_ring->skb = skb;
u64_stats_update_begin(&rx_ring->syncp);
......@@ -3187,6 +3242,59 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
return -1;
}
/**
* i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
* @xdp: data to transmit
* @xdp_ring: XDP Tx ring
**/
static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
struct i40e_ring *xdp_ring)
{
u32 size = xdp->data_end - xdp->data;
u16 i = xdp_ring->next_to_use;
struct i40e_tx_buffer *tx_bi;
struct i40e_tx_desc *tx_desc;
dma_addr_t dma;
if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
xdp_ring->tx_stats.tx_busy++;
return I40E_XDP_CONSUMED;
}
dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE);
if (dma_mapping_error(xdp_ring->dev, dma))
return I40E_XDP_CONSUMED;
tx_bi = &xdp_ring->tx_bi[i];
tx_bi->bytecount = size;
tx_bi->gso_segs = 1;
tx_bi->raw_buf = xdp->data;
/* record length, and DMA address */
dma_unmap_len_set(tx_bi, len, size);
dma_unmap_addr_set(tx_bi, dma, dma);
tx_desc = I40E_TX_DESC(xdp_ring, i);
tx_desc->buffer_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC
| I40E_TXD_CMD,
0, size, 0);
/* Make certain all of the status bits have been updated
* before next_to_watch is written.
*/
smp_wmb();
i++;
if (i == xdp_ring->count)
i = 0;
tx_bi->next_to_watch = tx_desc;
xdp_ring->next_to_use = i;
return I40E_XDP_TX;
}
/**
* i40e_xmit_frame_ring - Sends buffer on Tx ring
* @skb: send buffer
......
......@@ -396,6 +396,7 @@ struct i40e_ring {
u16 flags;
#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0)
#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1)
#define I40E_TXR_FLAGS_XDP BIT(2)
/* stats structs */
struct i40e_queue_stats stats;
......@@ -438,6 +439,16 @@ static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring)
ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED;
}
static inline bool ring_is_xdp(struct i40e_ring *ring)
{
return !!(ring->flags & I40E_TXR_FLAGS_XDP);
}
static inline void set_ring_xdp(struct i40e_ring *ring)
{
ring->flags |= I40E_TXR_FLAGS_XDP;
}
enum i40e_latency_range {
I40E_LOWEST_LATENCY = 0,
I40E_LOW_LATENCY = 1,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment