Commit 77890db1 authored by David S. Miller's avatar David S. Miller

Merge branch 'nxp-enetc-xdp'

Vladimir Oltean says:

====================
XDP for NXP ENETC

This series adds support to the enetc driver for the basic XDP primitives.
The ENETC is a network controller found inside the NXP LS1028A SoC,
which is a dual-core Cortex A72 device for industrial networking,
with the CPUs clocked at up to 1.3 GHz. On this platform, there are 4
ENETC ports and a 6-port embedded DSA switch, in a topology that looks
like this:

  +-------------------------------------------------------------------------+
  |                    +--------+ 1 Gbps (typically disabled)               |
  | ENETC PCI          |  ENETC |--------------------------+                |
  | Root Complex       | port 3 |-----------------------+  |                |
  | Integrated         +--------+                       |  |                |
  | Endpoint                                            |  |                |
  |                    +--------+ 2.5 Gbps              |  |                |
  |                    |  ENETC |--------------+        |  |                |
  |                    | port 2 |-----------+  |        |  |                |
  |                    +--------+           |  |        |  |                |
  |                                         |  |        |  |                |
  |                        +------------------------------------------------+
  |                        |             |  Felix |  |  Felix |             |
  |                        | Switch      | port 4 |  | port 5 |             |
  |                        |             +--------+  +--------+             |
  |                        |                                                |
  | +--------+  +--------+ | +--------+  +--------+  +--------+  +--------+ |
  | |  ENETC |  |  ENETC | | |  Felix |  |  Felix |  |  Felix |  |  Felix | |
  | | port 0 |  | port 1 | | | port 0 |  | port 1 |  | port 2 |  | port 3 | |
  +-------------------------------------------------------------------------+
         |          |             |           |            |          |
         v          v             v           v            v          v
       Up to      Up to                      Up to 4x 2.5Gbps
      2.5Gbps     1Gbps

The ENETC ports 2 and 3 can act as DSA masters for the embedded switch.
Because 4 out of the 6 externally-facing ports of the SoC are switch
ports, the most interesting use case for XDP on this device is in fact
XDP_TX on the 2.5Gbps DSA master.

Nonetheless, the results presented below are for IPv4 forwarding between
ENETC port 0 (eno0) and port 1 (eno1) both configured for 1Gbps.
There are two streams of IPv4/UDP datagrams with a frame length of 64
octets delivered at 100% port load to eno0 and to eno1. eno0 has a flow
steering rule to process the traffic on RX ring 0 (CPU 0), and eno1 has
a flow steering rule towards RX ring 1 (CPU 1).

For the IPFWD test, standard IP routing was enabled in the netns.
For the XDP_DROP test, the samples/bpf/xdp1 program was attached to both
eno0 and to eno1.
For the XDP_TX test, the samples/bpf/xdp2 program was attached to both
eno0 and to eno1.
For the XDP_REDIRECT test, the samples/bpf/xdp_redirect program was
attached once to the input of eno0/output of eno1, and twice to the
input of eno1/output of eno0.

Finally, the preliminary results are as follows:

        | IPFWD | XDP_TX | XDP_REDIRECT | XDP_DROP
--------+-------+--------+-------------------------
fps     | 761   | 2535   | 1735         | 2783
Gbps    | 0.51  | 1.71   | 1.17         | n/a

There is a strange phenomenon in my testing sistem where it appears that
one CPU is processing more than the other. I have not investigated this
too much. Also, the code might not be very well optimized (for example
dma_sync_for_device is called with the full ENETC_RXB_DMA_SIZE_XDP).

Design wise, the ENETC is a PCI device with BD rings, so it uses the
MEM_TYPE_PAGE_SHARED memory model, as can typically be seen in Intel
devices. The strategy was to build upon the existing model that the
driver uses, and not change it too much. So you will see things like a
separate NAPI poll function for XDP.

I have only tested with PAGE_SIZE=4096, and since we split pages in
half, it means that MTU-sized frames are scatter/gather (the XDP
headroom + skb_shared_info only leaves us 1476 bytes of data per
buffer). This is sub-optimal, but I would rather keep it this way and
help speed up Lorenzo's series for S/G support through testing, rather
than change the enetc driver to use some other memory model like page_pool.
My code is already structured for S/G, and that works fine for XDP_DROP
and XDP_TX, just not for XDP_REDIRECT, even between two enetc ports.
So the S/G XDP_REDIRECT is stubbed out (the frames are dropped), but
obviously I would like to remove that limitation soon.

Please note that I am rather new to this kind of stuff, I am more of a
control path person, so I would appreciate feedback.

Enough talking, on to the patches.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0d7a7b20 9d2b68cc
...@@ -2,41 +2,71 @@ ...@@ -2,41 +2,71 @@
/* Copyright 2017-2019 NXP */ /* Copyright 2017-2019 NXP */
#include "enetc.h" #include "enetc.h"
#include <linux/bpf_trace.h>
#include <linux/tcp.h> #include <linux/tcp.h>
#include <linux/udp.h> #include <linux/udp.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <net/pkt_sched.h> #include <net/pkt_sched.h>
/* ENETC overhead: optional extension BD + 1 BD gap */ static struct sk_buff *enetc_tx_swbd_get_skb(struct enetc_tx_swbd *tx_swbd)
#define ENETC_TXBDS_NEEDED(val) ((val) + 2) {
/* max # of chained Tx BDs is 15, including head and extension BD */ if (tx_swbd->is_xdp_tx || tx_swbd->is_xdp_redirect)
#define ENETC_MAX_SKB_FRAGS 13 return NULL;
#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
return tx_swbd->skb;
}
static struct xdp_frame *
enetc_tx_swbd_get_xdp_frame(struct enetc_tx_swbd *tx_swbd)
{
if (tx_swbd->is_xdp_redirect)
return tx_swbd->xdp_frame;
return NULL;
}
static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring, static void enetc_unmap_tx_buff(struct enetc_bdr *tx_ring,
struct enetc_tx_swbd *tx_swbd) struct enetc_tx_swbd *tx_swbd)
{ {
/* For XDP_TX, pages come from RX, whereas for the other contexts where
* we have is_dma_page_set, those come from skb_frag_dma_map. We need
* to match the DMA mapping length, so we need to differentiate those.
*/
if (tx_swbd->is_dma_page) if (tx_swbd->is_dma_page)
dma_unmap_page(tx_ring->dev, tx_swbd->dma, dma_unmap_page(tx_ring->dev, tx_swbd->dma,
tx_swbd->len, DMA_TO_DEVICE); tx_swbd->is_xdp_tx ? PAGE_SIZE : tx_swbd->len,
tx_swbd->dir);
else else
dma_unmap_single(tx_ring->dev, tx_swbd->dma, dma_unmap_single(tx_ring->dev, tx_swbd->dma,
tx_swbd->len, DMA_TO_DEVICE); tx_swbd->len, tx_swbd->dir);
tx_swbd->dma = 0; tx_swbd->dma = 0;
} }
static void enetc_free_tx_skb(struct enetc_bdr *tx_ring, static void enetc_free_tx_frame(struct enetc_bdr *tx_ring,
struct enetc_tx_swbd *tx_swbd) struct enetc_tx_swbd *tx_swbd)
{ {
struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd);
if (tx_swbd->dma) if (tx_swbd->dma)
enetc_unmap_tx_buff(tx_ring, tx_swbd); enetc_unmap_tx_buff(tx_ring, tx_swbd);
if (tx_swbd->skb) { if (xdp_frame) {
dev_kfree_skb_any(tx_swbd->skb); xdp_return_frame(tx_swbd->xdp_frame);
tx_swbd->xdp_frame = NULL;
} else if (skb) {
dev_kfree_skb_any(skb);
tx_swbd->skb = NULL; tx_swbd->skb = NULL;
} }
} }
/* Let H/W know BD ring has been updated */
static void enetc_update_tx_ring_tail(struct enetc_bdr *tx_ring)
{
/* includes wmb() */
enetc_wr_reg_hot(tx_ring->tpir, tx_ring->next_to_use);
}
static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
int active_offloads) int active_offloads)
{ {
...@@ -67,6 +97,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, ...@@ -67,6 +97,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
tx_swbd->dma = dma; tx_swbd->dma = dma;
tx_swbd->len = len; tx_swbd->len = len;
tx_swbd->is_dma_page = 0; tx_swbd->is_dma_page = 0;
tx_swbd->dir = DMA_TO_DEVICE;
count++; count++;
do_vlan = skb_vlan_tag_present(skb); do_vlan = skb_vlan_tag_present(skb);
...@@ -149,6 +180,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, ...@@ -149,6 +180,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
tx_swbd->dma = dma; tx_swbd->dma = dma;
tx_swbd->len = len; tx_swbd->len = len;
tx_swbd->is_dma_page = 1; tx_swbd->is_dma_page = 1;
tx_swbd->dir = DMA_TO_DEVICE;
count++; count++;
} }
...@@ -157,6 +189,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, ...@@ -157,6 +189,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
temp_bd.flags = flags; temp_bd.flags = flags;
*txbd = temp_bd; *txbd = temp_bd;
tx_ring->tx_swbd[i].is_eof = true;
tx_ring->tx_swbd[i].skb = skb; tx_ring->tx_swbd[i].skb = skb;
enetc_bdr_idx_inc(tx_ring, &i); enetc_bdr_idx_inc(tx_ring, &i);
...@@ -164,8 +197,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, ...@@ -164,8 +197,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
/* let H/W know BD ring has been updated */ enetc_update_tx_ring_tail(tx_ring);
enetc_wr_reg_hot(tx_ring->tpir, i); /* includes wmb() */
return count; return count;
...@@ -174,7 +206,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, ...@@ -174,7 +206,7 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
do { do {
tx_swbd = &tx_ring->tx_swbd[i]; tx_swbd = &tx_ring->tx_swbd[i];
enetc_free_tx_skb(tx_ring, tx_swbd); enetc_free_tx_frame(tx_ring, tx_swbd);
if (i == 0) if (i == 0)
i = tx_ring->bd_count; i = tx_ring->bd_count;
i--; i--;
...@@ -274,6 +306,25 @@ static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci) ...@@ -274,6 +306,25 @@ static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi; return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi;
} }
static bool enetc_page_reusable(struct page *page)
{
return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1);
}
static void enetc_reuse_page(struct enetc_bdr *rx_ring,
struct enetc_rx_swbd *old)
{
struct enetc_rx_swbd *new;
new = &rx_ring->rx_swbd[rx_ring->next_to_alloc];
/* next buf that may reuse a page */
enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc);
/* copy page reference */
*new = *old;
}
static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd, static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd,
u64 *tstamp) u64 *tstamp)
{ {
...@@ -299,6 +350,43 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp) ...@@ -299,6 +350,43 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
} }
} }
static void enetc_recycle_xdp_tx_buff(struct enetc_bdr *tx_ring,
struct enetc_tx_swbd *tx_swbd)
{
struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
struct enetc_bdr *rx_ring = priv->rx_ring[tx_ring->index];
struct enetc_rx_swbd rx_swbd = {
.dma = tx_swbd->dma,
.page = tx_swbd->page,
.page_offset = tx_swbd->page_offset,
.dir = tx_swbd->dir,
.len = tx_swbd->len,
};
if (likely(enetc_swbd_unused(rx_ring))) {
enetc_reuse_page(rx_ring, &rx_swbd);
/* sync for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, rx_swbd.dma,
rx_swbd.page_offset,
ENETC_RXB_DMA_SIZE_XDP,
rx_swbd.dir);
rx_ring->stats.recycles++;
} else {
/* RX ring is already full, we need to unmap and free the
* page, since there's nothing useful we can do with it.
*/
rx_ring->stats.recycle_failures++;
dma_unmap_page(rx_ring->dev, rx_swbd.dma, PAGE_SIZE,
rx_swbd.dir);
__free_page(rx_swbd.page);
}
rx_ring->xdp.xdp_tx_in_flight--;
}
static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
{ {
struct net_device *ndev = tx_ring->ndev; struct net_device *ndev = tx_ring->ndev;
...@@ -316,7 +404,8 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) ...@@ -316,7 +404,8 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
do_tstamp = false; do_tstamp = false;
while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) { while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
bool is_eof = !!tx_swbd->skb; struct xdp_frame *xdp_frame = enetc_tx_swbd_get_xdp_frame(tx_swbd);
struct sk_buff *skb = enetc_tx_swbd_get_skb(tx_swbd);
if (unlikely(tx_swbd->check_wb)) { if (unlikely(tx_swbd->check_wb)) {
struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_ndev_priv *priv = netdev_priv(ndev);
...@@ -332,19 +421,28 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) ...@@ -332,19 +421,28 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
} }
} }
if (likely(tx_swbd->dma)) if (tx_swbd->is_xdp_tx)
enetc_recycle_xdp_tx_buff(tx_ring, tx_swbd);
else if (likely(tx_swbd->dma))
enetc_unmap_tx_buff(tx_ring, tx_swbd); enetc_unmap_tx_buff(tx_ring, tx_swbd);
if (is_eof) { if (xdp_frame) {
xdp_return_frame(xdp_frame);
tx_swbd->xdp_frame = NULL;
} else if (skb) {
if (unlikely(do_tstamp)) { if (unlikely(do_tstamp)) {
enetc_tstamp_tx(tx_swbd->skb, tstamp); enetc_tstamp_tx(skb, tstamp);
do_tstamp = false; do_tstamp = false;
} }
napi_consume_skb(tx_swbd->skb, napi_budget); napi_consume_skb(skb, napi_budget);
tx_swbd->skb = NULL; tx_swbd->skb = NULL;
} }
tx_byte_cnt += tx_swbd->len; tx_byte_cnt += tx_swbd->len;
/* Scrub the swbd here so we don't have to do that
* when we reuse it during xmit
*/
memset(tx_swbd, 0, sizeof(*tx_swbd));
bds_to_clean--; bds_to_clean--;
tx_swbd++; tx_swbd++;
...@@ -355,7 +453,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) ...@@ -355,7 +453,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
} }
/* BD iteration loop end */ /* BD iteration loop end */
if (is_eof) { if (tx_swbd->is_eof) {
tx_frm_cnt++; tx_frm_cnt++;
/* re-arm interrupt source */ /* re-arm interrupt source */
enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) | enetc_wr_reg_hot(tx_ring->idr, BIT(tx_ring->index) |
...@@ -382,6 +480,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) ...@@ -382,6 +480,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
static bool enetc_new_page(struct enetc_bdr *rx_ring, static bool enetc_new_page(struct enetc_bdr *rx_ring,
struct enetc_rx_swbd *rx_swbd) struct enetc_rx_swbd *rx_swbd)
{ {
bool xdp = !!(rx_ring->xdp.prog);
struct page *page; struct page *page;
dma_addr_t addr; dma_addr_t addr;
...@@ -389,7 +488,10 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring, ...@@ -389,7 +488,10 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring,
if (unlikely(!page)) if (unlikely(!page))
return false; return false;
addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); /* For XDP_TX, we forgo dma_unmap -> dma_map */
rx_swbd->dir = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
addr = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, rx_swbd->dir);
if (unlikely(dma_mapping_error(rx_ring->dev, addr))) { if (unlikely(dma_mapping_error(rx_ring->dev, addr))) {
__free_page(page); __free_page(page);
...@@ -398,7 +500,7 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring, ...@@ -398,7 +500,7 @@ static bool enetc_new_page(struct enetc_bdr *rx_ring,
rx_swbd->dma = addr; rx_swbd->dma = addr;
rx_swbd->page = page; rx_swbd->page = page;
rx_swbd->page_offset = ENETC_RXB_PAD; rx_swbd->page_offset = rx_ring->buffer_offset;
return true; return true;
} }
...@@ -513,32 +615,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring, ...@@ -513,32 +615,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
#endif #endif
} }
static void enetc_process_skb(struct enetc_bdr *rx_ring, /* This gets called during the non-XDP NAPI poll cycle as well as on XDP_PASS,
struct sk_buff *skb) * so it needs to work with both DMA_FROM_DEVICE as well as DMA_BIDIRECTIONAL
{ * mapped buffers.
skb_record_rx_queue(skb, rx_ring->index); */
skb->protocol = eth_type_trans(skb, rx_ring->ndev);
}
static bool enetc_page_reusable(struct page *page)
{
return (!page_is_pfmemalloc(page) && page_ref_count(page) == 1);
}
static void enetc_reuse_page(struct enetc_bdr *rx_ring,
struct enetc_rx_swbd *old)
{
struct enetc_rx_swbd *new;
new = &rx_ring->rx_swbd[rx_ring->next_to_alloc];
/* next buf that may reuse a page */
enetc_bdr_idx_inc(rx_ring, &rx_ring->next_to_alloc);
/* copy page reference */
*new = *old;
}
static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring, static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
int i, u16 size) int i, u16 size)
{ {
...@@ -546,7 +626,7 @@ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring, ...@@ -546,7 +626,7 @@ static struct enetc_rx_swbd *enetc_get_rx_buff(struct enetc_bdr *rx_ring,
dma_sync_single_range_for_cpu(rx_ring->dev, rx_swbd->dma, dma_sync_single_range_for_cpu(rx_ring->dev, rx_swbd->dma,
rx_swbd->page_offset, rx_swbd->page_offset,
size, DMA_FROM_DEVICE); size, rx_swbd->dir);
return rx_swbd; return rx_swbd;
} }
...@@ -554,6 +634,8 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring, ...@@ -554,6 +634,8 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
struct enetc_rx_swbd *rx_swbd) struct enetc_rx_swbd *rx_swbd)
{ {
if (likely(enetc_page_reusable(rx_swbd->page))) { if (likely(enetc_page_reusable(rx_swbd->page))) {
size_t buffer_size = ENETC_RXB_TRUESIZE - rx_ring->buffer_offset;
rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE; rx_swbd->page_offset ^= ENETC_RXB_TRUESIZE;
page_ref_inc(rx_swbd->page); page_ref_inc(rx_swbd->page);
...@@ -562,11 +644,10 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring, ...@@ -562,11 +644,10 @@ static void enetc_put_rx_buff(struct enetc_bdr *rx_ring,
/* sync for use by the device */ /* sync for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma, dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
rx_swbd->page_offset, rx_swbd->page_offset,
ENETC_RXB_DMA_SIZE, buffer_size, rx_swbd->dir);
DMA_FROM_DEVICE);
} else { } else {
dma_unmap_page(rx_ring->dev, rx_swbd->dma, dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
PAGE_SIZE, DMA_FROM_DEVICE); rx_swbd->dir);
} }
rx_swbd->page = NULL; rx_swbd->page = NULL;
...@@ -580,13 +661,13 @@ static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring, ...@@ -580,13 +661,13 @@ static struct sk_buff *enetc_map_rx_buff_to_skb(struct enetc_bdr *rx_ring,
void *ba; void *ba;
ba = page_address(rx_swbd->page) + rx_swbd->page_offset; ba = page_address(rx_swbd->page) + rx_swbd->page_offset;
skb = build_skb(ba - ENETC_RXB_PAD, ENETC_RXB_TRUESIZE); skb = build_skb(ba - rx_ring->buffer_offset, ENETC_RXB_TRUESIZE);
if (unlikely(!skb)) { if (unlikely(!skb)) {
rx_ring->stats.rx_alloc_errs++; rx_ring->stats.rx_alloc_errs++;
return NULL; return NULL;
} }
skb_reserve(skb, ENETC_RXB_PAD); skb_reserve(skb, rx_ring->buffer_offset);
__skb_put(skb, size); __skb_put(skb, size);
enetc_put_rx_buff(rx_ring, rx_swbd); enetc_put_rx_buff(rx_ring, rx_swbd);
...@@ -605,6 +686,69 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i, ...@@ -605,6 +686,69 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i,
enetc_put_rx_buff(rx_ring, rx_swbd); enetc_put_rx_buff(rx_ring, rx_swbd);
} }
static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
u32 bd_status,
union enetc_rx_bd **rxbd, int *i)
{
if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))))
return false;
enetc_rxbd_next(rx_ring, rxbd, i);
while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
dma_rmb();
bd_status = le32_to_cpu((*rxbd)->r.lstatus);
enetc_rxbd_next(rx_ring, rxbd, i);
}
rx_ring->ndev->stats.rx_dropped++;
rx_ring->ndev->stats.rx_errors++;
return true;
}
static struct sk_buff *enetc_build_skb(struct enetc_bdr *rx_ring,
u32 bd_status, union enetc_rx_bd **rxbd,
int *i, int *cleaned_cnt, int buffer_size)
{
struct sk_buff *skb;
u16 size;
size = le16_to_cpu((*rxbd)->r.buf_len);
skb = enetc_map_rx_buff_to_skb(rx_ring, *i, size);
if (!skb)
return NULL;
enetc_get_offloads(rx_ring, *rxbd, skb);
(*cleaned_cnt)++;
enetc_rxbd_next(rx_ring, rxbd, i);
/* not last BD in frame? */
while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
bd_status = le32_to_cpu((*rxbd)->r.lstatus);
size = buffer_size;
if (bd_status & ENETC_RXBD_LSTATUS_F) {
dma_rmb();
size = le16_to_cpu((*rxbd)->r.buf_len);
}
enetc_add_rx_buff_to_skb(rx_ring, *i, size, skb);
(*cleaned_cnt)++;
enetc_rxbd_next(rx_ring, rxbd, i);
}
skb_record_rx_queue(skb, rx_ring->index);
skb->protocol = eth_type_trans(skb, rx_ring->ndev);
return skb;
}
#define ENETC_RXBD_BUNDLE 16 /* # of BDs to update at once */ #define ENETC_RXBD_BUNDLE 16 /* # of BDs to update at once */
static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
...@@ -621,7 +765,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, ...@@ -621,7 +765,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
union enetc_rx_bd *rxbd; union enetc_rx_bd *rxbd;
struct sk_buff *skb; struct sk_buff *skb;
u32 bd_status; u32 bd_status;
u16 size;
if (cleaned_cnt >= ENETC_RXBD_BUNDLE) if (cleaned_cnt >= ENETC_RXBD_BUNDLE)
cleaned_cnt -= enetc_refill_rx_ring(rx_ring, cleaned_cnt -= enetc_refill_rx_ring(rx_ring,
...@@ -634,55 +777,446 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, ...@@ -634,55 +777,446 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index)); enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
dma_rmb(); /* for reading other rxbd fields */ dma_rmb(); /* for reading other rxbd fields */
size = le16_to_cpu(rxbd->r.buf_len);
skb = enetc_map_rx_buff_to_skb(rx_ring, i, size); if (enetc_check_bd_errors_and_consume(rx_ring, bd_status,
&rxbd, &i))
break;
skb = enetc_build_skb(rx_ring, bd_status, &rxbd, &i,
&cleaned_cnt, ENETC_RXB_DMA_SIZE);
if (!skb) if (!skb)
break; break;
enetc_get_offloads(rx_ring, rxbd, skb); rx_byte_cnt += skb->len;
rx_frm_cnt++;
cleaned_cnt++; napi_gro_receive(napi, skb);
}
enetc_rxbd_next(rx_ring, &rxbd, &i); rx_ring->next_to_clean = i;
if (unlikely(bd_status & rx_ring->stats.packets += rx_frm_cnt;
ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) { rx_ring->stats.bytes += rx_byte_cnt;
dev_kfree_skb(skb);
while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
dma_rmb();
bd_status = le32_to_cpu(rxbd->r.lstatus);
enetc_rxbd_next(rx_ring, &rxbd, &i); return rx_frm_cnt;
} }
static void enetc_xdp_map_tx_buff(struct enetc_bdr *tx_ring, int i,
struct enetc_tx_swbd *tx_swbd,
int frm_len)
{
union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i);
prefetchw(txbd);
enetc_clear_tx_bd(txbd);
txbd->addr = cpu_to_le64(tx_swbd->dma + tx_swbd->page_offset);
txbd->buf_len = cpu_to_le16(tx_swbd->len);
txbd->frm_len = cpu_to_le16(frm_len);
memcpy(&tx_ring->tx_swbd[i], tx_swbd, sizeof(*tx_swbd));
}
/* Puts in the TX ring one XDP frame, mapped as an array of TX software buffer
* descriptors.
*/
static bool enetc_xdp_tx(struct enetc_bdr *tx_ring,
struct enetc_tx_swbd *xdp_tx_arr, int num_tx_swbd)
{
struct enetc_tx_swbd *tmp_tx_swbd = xdp_tx_arr;
int i, k, frm_len = tmp_tx_swbd->len;
if (unlikely(enetc_bd_unused(tx_ring) < ENETC_TXBDS_NEEDED(num_tx_swbd)))
return false;
while (unlikely(!tmp_tx_swbd->is_eof)) {
tmp_tx_swbd++;
frm_len += tmp_tx_swbd->len;
}
i = tx_ring->next_to_use;
for (k = 0; k < num_tx_swbd; k++) {
struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[k];
enetc_xdp_map_tx_buff(tx_ring, i, xdp_tx_swbd, frm_len);
/* last BD needs 'F' bit set */
if (xdp_tx_swbd->is_eof) {
union enetc_tx_bd *txbd = ENETC_TXBD(*tx_ring, i);
txbd->flags = ENETC_TXBD_FLAGS_F;
}
enetc_bdr_idx_inc(tx_ring, &i);
}
tx_ring->next_to_use = i;
return true;
}
static int enetc_xdp_frame_to_xdp_tx_swbd(struct enetc_bdr *tx_ring,
struct enetc_tx_swbd *xdp_tx_arr,
struct xdp_frame *xdp_frame)
{
struct enetc_tx_swbd *xdp_tx_swbd = &xdp_tx_arr[0];
struct skb_shared_info *shinfo;
void *data = xdp_frame->data;
int len = xdp_frame->len;
skb_frag_t *frag;
dma_addr_t dma;
unsigned int f;
int n = 0;
dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
netdev_err(tx_ring->ndev, "DMA map error\n");
return -1;
}
xdp_tx_swbd->dma = dma;
xdp_tx_swbd->dir = DMA_TO_DEVICE;
xdp_tx_swbd->len = len;
xdp_tx_swbd->is_xdp_redirect = true;
xdp_tx_swbd->is_eof = false;
xdp_tx_swbd->xdp_frame = NULL;
n++;
xdp_tx_swbd = &xdp_tx_arr[n];
shinfo = xdp_get_shared_info_from_frame(xdp_frame);
for (f = 0, frag = &shinfo->frags[0]; f < shinfo->nr_frags;
f++, frag++) {
data = skb_frag_address(frag);
len = skb_frag_size(frag);
dma = dma_map_single(tx_ring->dev, data, len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(tx_ring->dev, dma))) {
/* Undo the DMA mapping for all fragments */
while (n-- >= 0)
enetc_unmap_tx_buff(tx_ring, &xdp_tx_arr[n]);
netdev_err(tx_ring->ndev, "DMA map error\n");
return -1;
}
xdp_tx_swbd->dma = dma;
xdp_tx_swbd->dir = DMA_TO_DEVICE;
xdp_tx_swbd->len = len;
xdp_tx_swbd->is_xdp_redirect = true;
xdp_tx_swbd->is_eof = false;
xdp_tx_swbd->xdp_frame = NULL;
rx_ring->ndev->stats.rx_dropped++; n++;
rx_ring->ndev->stats.rx_errors++; xdp_tx_swbd = &xdp_tx_arr[n];
}
xdp_tx_arr[n - 1].is_eof = true;
xdp_tx_arr[n - 1].xdp_frame = xdp_frame;
return n;
}
int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
struct xdp_frame **frames, u32 flags)
{
struct enetc_tx_swbd xdp_redirect_arr[ENETC_MAX_SKB_FRAGS] = {0};
struct enetc_ndev_priv *priv = netdev_priv(ndev);
struct enetc_bdr *tx_ring;
int xdp_tx_bd_cnt, i, k;
int xdp_tx_frm_cnt = 0;
tx_ring = priv->tx_ring[smp_processor_id()];
prefetchw(ENETC_TXBD(*tx_ring, tx_ring->next_to_use));
for (k = 0; k < num_frames; k++) {
xdp_tx_bd_cnt = enetc_xdp_frame_to_xdp_tx_swbd(tx_ring,
xdp_redirect_arr,
frames[k]);
if (unlikely(xdp_tx_bd_cnt < 0))
break;
if (unlikely(!enetc_xdp_tx(tx_ring, xdp_redirect_arr,
xdp_tx_bd_cnt))) {
for (i = 0; i < xdp_tx_bd_cnt; i++)
enetc_unmap_tx_buff(tx_ring,
&xdp_redirect_arr[i]);
tx_ring->stats.xdp_tx_drops++;
break; break;
} }
/* not last BD in frame? */ xdp_tx_frm_cnt++;
while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { }
bd_status = le32_to_cpu(rxbd->r.lstatus);
size = ENETC_RXB_DMA_SIZE;
if (bd_status & ENETC_RXBD_LSTATUS_F) { if (unlikely((flags & XDP_XMIT_FLUSH) || k != xdp_tx_frm_cnt))
dma_rmb(); enetc_update_tx_ring_tail(tx_ring);
size = le16_to_cpu(rxbd->r.buf_len);
} tx_ring->stats.xdp_tx += xdp_tx_frm_cnt;
return xdp_tx_frm_cnt;
}
static void enetc_map_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
struct xdp_buff *xdp_buff, u16 size)
{
struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
void *hard_start = page_address(rx_swbd->page) + rx_swbd->page_offset;
struct skb_shared_info *shinfo;
enetc_add_rx_buff_to_skb(rx_ring, i, size, skb); /* To be used for XDP_TX */
rx_swbd->len = size;
cleaned_cnt++; xdp_prepare_buff(xdp_buff, hard_start - rx_ring->buffer_offset,
rx_ring->buffer_offset, size, false);
enetc_rxbd_next(rx_ring, &rxbd, &i); shinfo = xdp_get_shared_info_from_buff(xdp_buff);
shinfo->nr_frags = 0;
}
static void enetc_add_rx_buff_to_xdp(struct enetc_bdr *rx_ring, int i,
u16 size, struct xdp_buff *xdp_buff)
{
struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp_buff);
struct enetc_rx_swbd *rx_swbd = enetc_get_rx_buff(rx_ring, i, size);
skb_frag_t *frag = &shinfo->frags[shinfo->nr_frags];
/* To be used for XDP_TX */
rx_swbd->len = size;
skb_frag_off_set(frag, rx_swbd->page_offset);
skb_frag_size_set(frag, size);
__skb_frag_set_page(frag, rx_swbd->page);
shinfo->nr_frags++;
}
static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status,
union enetc_rx_bd **rxbd, int *i,
int *cleaned_cnt, struct xdp_buff *xdp_buff)
{
u16 size = le16_to_cpu((*rxbd)->r.buf_len);
xdp_init_buff(xdp_buff, ENETC_RXB_TRUESIZE, &rx_ring->xdp.rxq);
enetc_map_rx_buff_to_xdp(rx_ring, *i, xdp_buff, size);
(*cleaned_cnt)++;
enetc_rxbd_next(rx_ring, rxbd, i);
/* not last BD in frame? */
while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
bd_status = le32_to_cpu((*rxbd)->r.lstatus);
size = ENETC_RXB_DMA_SIZE_XDP;
if (bd_status & ENETC_RXBD_LSTATUS_F) {
dma_rmb();
size = le16_to_cpu((*rxbd)->r.buf_len);
} }
rx_byte_cnt += skb->len; enetc_add_rx_buff_to_xdp(rx_ring, *i, size, xdp_buff);
(*cleaned_cnt)++;
enetc_rxbd_next(rx_ring, rxbd, i);
}
}
enetc_process_skb(rx_ring, skb); /* Reuse the current page without performing half-page buffer flipping */
static void enetc_put_xdp_buff(struct enetc_bdr *rx_ring,
struct enetc_rx_swbd *rx_swbd)
{
enetc_reuse_page(rx_ring, rx_swbd);
napi_gro_receive(napi, skb); dma_sync_single_range_for_device(rx_ring->dev, rx_swbd->dma,
rx_swbd->page_offset,
ENETC_RXB_DMA_SIZE_XDP,
rx_swbd->dir);
rx_swbd->page = NULL;
}
/* Convert RX buffer descriptors to TX buffer descriptors. These will be
* recycled back into the RX ring in enetc_clean_tx_ring. We need to scrub the
* RX software BDs because the ownership of the buffer no longer belongs to the
* RX ring, so enetc_refill_rx_ring may not reuse rx_swbd->page.
*/
static int enetc_rx_swbd_to_xdp_tx_swbd(struct enetc_tx_swbd *xdp_tx_arr,
struct enetc_bdr *rx_ring,
int rx_ring_first, int rx_ring_last)
{
int n = 0;
for (; rx_ring_first != rx_ring_last;
n++, enetc_bdr_idx_inc(rx_ring, &rx_ring_first)) {
struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first];
struct enetc_tx_swbd *tx_swbd = &xdp_tx_arr[n];
/* No need to dma_map, we already have DMA_BIDIRECTIONAL */
tx_swbd->dma = rx_swbd->dma;
tx_swbd->dir = rx_swbd->dir;
tx_swbd->page = rx_swbd->page;
tx_swbd->page_offset = rx_swbd->page_offset;
tx_swbd->len = rx_swbd->len;
tx_swbd->is_dma_page = true;
tx_swbd->is_xdp_tx = true;
tx_swbd->is_eof = false;
memset(rx_swbd, 0, sizeof(*rx_swbd));
}
/* We rely on caller providing an rx_ring_last > rx_ring_first */
xdp_tx_arr[n - 1].is_eof = true;
return n;
}
static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
int rx_ring_last)
{
while (rx_ring_first != rx_ring_last) {
enetc_put_xdp_buff(rx_ring,
&rx_ring->rx_swbd[rx_ring_first]);
enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
}
rx_ring->stats.xdp_drops++;
}
static void enetc_xdp_free(struct enetc_bdr *rx_ring, int rx_ring_first,
int rx_ring_last)
{
while (rx_ring_first != rx_ring_last) {
struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first];
if (rx_swbd->page) {
dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
rx_swbd->dir);
__free_page(rx_swbd->page);
rx_swbd->page = NULL;
}
enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
}
rx_ring->stats.xdp_redirect_failures++;
}
static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
struct napi_struct *napi, int work_limit,
struct bpf_prog *prog)
{
int xdp_tx_bd_cnt, xdp_tx_frm_cnt = 0, xdp_redirect_frm_cnt = 0;
struct enetc_tx_swbd xdp_tx_arr[ENETC_MAX_SKB_FRAGS] = {0};
struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
struct enetc_bdr *tx_ring = priv->tx_ring[rx_ring->index];
int rx_frm_cnt = 0, rx_byte_cnt = 0;
int cleaned_cnt, i;
u32 xdp_act;
cleaned_cnt = enetc_bd_unused(rx_ring);
/* next descriptor to process */
i = rx_ring->next_to_clean;
while (likely(rx_frm_cnt < work_limit)) {
union enetc_rx_bd *rxbd, *orig_rxbd;
int orig_i, orig_cleaned_cnt;
struct xdp_buff xdp_buff;
struct sk_buff *skb;
int tmp_orig_i, err;
u32 bd_status;
rxbd = enetc_rxbd(rx_ring, i);
bd_status = le32_to_cpu(rxbd->r.lstatus);
if (!bd_status)
break;
enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
dma_rmb(); /* for reading other rxbd fields */
if (enetc_check_bd_errors_and_consume(rx_ring, bd_status,
&rxbd, &i))
break;
orig_rxbd = rxbd;
orig_cleaned_cnt = cleaned_cnt;
orig_i = i;
enetc_build_xdp_buff(rx_ring, bd_status, &rxbd, &i,
&cleaned_cnt, &xdp_buff);
xdp_act = bpf_prog_run_xdp(prog, &xdp_buff);
switch (xdp_act) {
case XDP_ABORTED:
trace_xdp_exception(rx_ring->ndev, prog, xdp_act);
fallthrough;
case XDP_DROP:
enetc_xdp_drop(rx_ring, orig_i, i);
break;
case XDP_PASS:
rxbd = orig_rxbd;
cleaned_cnt = orig_cleaned_cnt;
i = orig_i;
skb = enetc_build_skb(rx_ring, bd_status, &rxbd,
&i, &cleaned_cnt,
ENETC_RXB_DMA_SIZE_XDP);
if (unlikely(!skb))
/* Exit the switch/case, not the loop */
break;
napi_gro_receive(napi, skb);
break;
case XDP_TX:
xdp_tx_bd_cnt = enetc_rx_swbd_to_xdp_tx_swbd(xdp_tx_arr,
rx_ring,
orig_i, i);
if (!enetc_xdp_tx(tx_ring, xdp_tx_arr, xdp_tx_bd_cnt)) {
enetc_xdp_drop(rx_ring, orig_i, i);
tx_ring->stats.xdp_tx_drops++;
} else {
tx_ring->stats.xdp_tx += xdp_tx_bd_cnt;
rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt;
xdp_tx_frm_cnt++;
}
break;
case XDP_REDIRECT:
/* xdp_return_frame does not support S/G in the sense
* that it leaks the fragments (__xdp_return should not
* call page_frag_free only for the initial buffer).
* Until XDP_REDIRECT gains support for S/G let's keep
* the code structure in place, but dead. We drop the
* S/G frames ourselves to avoid memory leaks which
* would otherwise leave the kernel OOM.
*/
if (unlikely(cleaned_cnt - orig_cleaned_cnt != 1)) {
enetc_xdp_drop(rx_ring, orig_i, i);
rx_ring->stats.xdp_redirect_sg++;
break;
}
tmp_orig_i = orig_i;
while (orig_i != i) {
enetc_put_rx_buff(rx_ring,
&rx_ring->rx_swbd[orig_i]);
enetc_bdr_idx_inc(rx_ring, &orig_i);
}
err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog);
if (unlikely(err)) {
enetc_xdp_free(rx_ring, tmp_orig_i, i);
} else {
xdp_redirect_frm_cnt++;
rx_ring->stats.xdp_redirect++;
}
if (unlikely(xdp_redirect_frm_cnt > ENETC_DEFAULT_TX_WORK)) {
xdp_do_flush_map();
xdp_redirect_frm_cnt = 0;
}
break;
default:
bpf_warn_invalid_xdp_action(xdp_act);
}
rx_frm_cnt++; rx_frm_cnt++;
} }
...@@ -692,6 +1226,16 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, ...@@ -692,6 +1226,16 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
rx_ring->stats.packets += rx_frm_cnt; rx_ring->stats.packets += rx_frm_cnt;
rx_ring->stats.bytes += rx_byte_cnt; rx_ring->stats.bytes += rx_byte_cnt;
if (xdp_redirect_frm_cnt)
xdp_do_flush_map();
if (xdp_tx_frm_cnt)
enetc_update_tx_ring_tail(tx_ring);
if (cleaned_cnt > rx_ring->xdp.xdp_tx_in_flight)
enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring) -
rx_ring->xdp.xdp_tx_in_flight);
return rx_frm_cnt; return rx_frm_cnt;
} }
...@@ -699,6 +1243,8 @@ static int enetc_poll(struct napi_struct *napi, int budget) ...@@ -699,6 +1243,8 @@ static int enetc_poll(struct napi_struct *napi, int budget)
{ {
struct enetc_int_vector struct enetc_int_vector
*v = container_of(napi, struct enetc_int_vector, napi); *v = container_of(napi, struct enetc_int_vector, napi);
struct enetc_bdr *rx_ring = &v->rx_ring;
struct bpf_prog *prog;
bool complete = true; bool complete = true;
int work_done; int work_done;
int i; int i;
...@@ -709,7 +1255,11 @@ static int enetc_poll(struct napi_struct *napi, int budget) ...@@ -709,7 +1255,11 @@ static int enetc_poll(struct napi_struct *napi, int budget)
if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
complete = false; complete = false;
work_done = enetc_clean_rx_ring(&v->rx_ring, napi, budget); prog = rx_ring->xdp.prog;
if (prog)
work_done = enetc_clean_rx_ring_xdp(rx_ring, napi, budget, prog);
else
work_done = enetc_clean_rx_ring(rx_ring, napi, budget);
if (work_done == budget) if (work_done == budget)
complete = false; complete = false;
if (work_done) if (work_done)
...@@ -813,7 +1363,7 @@ static void enetc_free_txbdr(struct enetc_bdr *txr) ...@@ -813,7 +1363,7 @@ static void enetc_free_txbdr(struct enetc_bdr *txr)
int size, i; int size, i;
for (i = 0; i < txr->bd_count; i++) for (i = 0; i < txr->bd_count; i++)
enetc_free_tx_skb(txr, &txr->tx_swbd[i]); enetc_free_tx_frame(txr, &txr->tx_swbd[i]);
size = txr->bd_count * sizeof(union enetc_tx_bd); size = txr->bd_count * sizeof(union enetc_tx_bd);
...@@ -930,7 +1480,7 @@ static void enetc_free_tx_ring(struct enetc_bdr *tx_ring) ...@@ -930,7 +1480,7 @@ static void enetc_free_tx_ring(struct enetc_bdr *tx_ring)
for (i = 0; i < tx_ring->bd_count; i++) { for (i = 0; i < tx_ring->bd_count; i++) {
struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i]; struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i];
enetc_free_tx_skb(tx_ring, tx_swbd); enetc_free_tx_frame(tx_ring, tx_swbd);
} }
tx_ring->next_to_clean = 0; tx_ring->next_to_clean = 0;
...@@ -950,8 +1500,8 @@ static void enetc_free_rx_ring(struct enetc_bdr *rx_ring) ...@@ -950,8 +1500,8 @@ static void enetc_free_rx_ring(struct enetc_bdr *rx_ring)
if (!rx_swbd->page) if (!rx_swbd->page)
continue; continue;
dma_unmap_page(rx_ring->dev, rx_swbd->dma, dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
PAGE_SIZE, DMA_FROM_DEVICE); rx_swbd->dir);
__free_page(rx_swbd->page); __free_page(rx_swbd->page);
rx_swbd->page = NULL; rx_swbd->page = NULL;
} }
...@@ -1099,7 +1649,10 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) ...@@ -1099,7 +1649,10 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
enetc_rxbdr_wr(hw, idx, ENETC_RBLENR, enetc_rxbdr_wr(hw, idx, ENETC_RBLENR,
ENETC_RTBLENR_LEN(rx_ring->bd_count)); ENETC_RTBLENR_LEN(rx_ring->bd_count));
enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE); if (rx_ring->xdp.prog)
enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE_XDP);
else
enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, ENETC_RXB_DMA_SIZE);
enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0); enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0);
...@@ -1490,6 +2043,54 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, ...@@ -1490,6 +2043,54 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
} }
} }
static int enetc_setup_xdp_prog(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
struct enetc_ndev_priv *priv = netdev_priv(dev);
struct bpf_prog *old_prog;
bool is_up;
int i;
/* The buffer layout is changing, so we need to drain the old
* RX buffers and seed new ones.
*/
is_up = netif_running(dev);
if (is_up)
dev_close(dev);
old_prog = xchg(&priv->xdp_prog, prog);
if (old_prog)
bpf_prog_put(old_prog);
for (i = 0; i < priv->num_rx_rings; i++) {
struct enetc_bdr *rx_ring = priv->rx_ring[i];
rx_ring->xdp.prog = prog;
if (prog)
rx_ring->buffer_offset = XDP_PACKET_HEADROOM;
else
rx_ring->buffer_offset = ENETC_RXB_PAD;
}
if (is_up)
return dev_open(dev, extack);
return 0;
}
int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp)
{
switch (xdp->command) {
case XDP_SETUP_PROG:
return enetc_setup_xdp_prog(dev, xdp->prog, xdp->extack);
default:
return -EINVAL;
}
return 0;
}
struct net_device_stats *enetc_get_stats(struct net_device *ndev) struct net_device_stats *enetc_get_stats(struct net_device *ndev)
{ {
struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_ndev_priv *priv = netdev_priv(ndev);
...@@ -1706,6 +2307,28 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) ...@@ -1706,6 +2307,28 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
priv->int_vector[i] = v; priv->int_vector[i] = v;
bdr = &v->rx_ring;
bdr->index = i;
bdr->ndev = priv->ndev;
bdr->dev = priv->dev;
bdr->bd_count = priv->rx_bd_count;
bdr->buffer_offset = ENETC_RXB_PAD;
priv->rx_ring[i] = bdr;
err = xdp_rxq_info_reg(&bdr->xdp.rxq, priv->ndev, i, 0);
if (err) {
kfree(v);
goto fail;
}
err = xdp_rxq_info_reg_mem_model(&bdr->xdp.rxq,
MEM_TYPE_PAGE_SHARED, NULL);
if (err) {
xdp_rxq_info_unreg(&bdr->xdp.rxq);
kfree(v);
goto fail;
}
/* init defaults for adaptive IC */ /* init defaults for adaptive IC */
if (priv->ic_mode & ENETC_IC_RX_ADAPTIVE) { if (priv->ic_mode & ENETC_IC_RX_ADAPTIVE) {
v->rx_ictt = 0x1; v->rx_ictt = 0x1;
...@@ -1733,22 +2356,20 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv) ...@@ -1733,22 +2356,20 @@ int enetc_alloc_msix(struct enetc_ndev_priv *priv)
bdr->bd_count = priv->tx_bd_count; bdr->bd_count = priv->tx_bd_count;
priv->tx_ring[idx] = bdr; priv->tx_ring[idx] = bdr;
} }
bdr = &v->rx_ring;
bdr->index = i;
bdr->ndev = priv->ndev;
bdr->dev = priv->dev;
bdr->bd_count = priv->rx_bd_count;
priv->rx_ring[i] = bdr;
} }
return 0; return 0;
fail: fail:
while (i--) { while (i--) {
netif_napi_del(&priv->int_vector[i]->napi); struct enetc_int_vector *v = priv->int_vector[i];
cancel_work_sync(&priv->int_vector[i]->rx_dim.work); struct enetc_bdr *rx_ring = &v->rx_ring;
kfree(priv->int_vector[i]);
xdp_rxq_info_unreg_mem_model(&rx_ring->xdp.rxq);
xdp_rxq_info_unreg(&rx_ring->xdp.rxq);
netif_napi_del(&v->napi);
cancel_work_sync(&v->rx_dim.work);
kfree(v);
} }
pci_free_irq_vectors(pdev); pci_free_irq_vectors(pdev);
...@@ -1762,7 +2383,10 @@ void enetc_free_msix(struct enetc_ndev_priv *priv) ...@@ -1762,7 +2383,10 @@ void enetc_free_msix(struct enetc_ndev_priv *priv)
for (i = 0; i < priv->bdr_int_num; i++) { for (i = 0; i < priv->bdr_int_num; i++) {
struct enetc_int_vector *v = priv->int_vector[i]; struct enetc_int_vector *v = priv->int_vector[i];
struct enetc_bdr *rx_ring = &v->rx_ring;
xdp_rxq_info_unreg_mem_model(&rx_ring->xdp.rxq);
xdp_rxq_info_unreg(&rx_ring->xdp.rxq);
netif_napi_del(&v->napi); netif_napi_del(&v->napi);
cancel_work_sync(&v->rx_dim.work); cancel_work_sync(&v->rx_dim.work);
} }
......
...@@ -19,12 +19,21 @@ ...@@ -19,12 +19,21 @@
(ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN)) (ETH_FCS_LEN + ETH_HLEN + VLAN_HLEN))
struct enetc_tx_swbd { struct enetc_tx_swbd {
struct sk_buff *skb; union {
struct sk_buff *skb;
struct xdp_frame *xdp_frame;
};
dma_addr_t dma; dma_addr_t dma;
struct page *page; /* valid only if is_xdp_tx */
u16 page_offset; /* valid only if is_xdp_tx */
u16 len; u16 len;
enum dma_data_direction dir;
u8 is_dma_page:1; u8 is_dma_page:1;
u8 check_wb:1; u8 check_wb:1;
u8 do_tstamp:1; u8 do_tstamp:1;
u8 is_eof:1;
u8 is_xdp_tx:1;
u8 is_xdp_redirect:1;
}; };
#define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE
...@@ -32,20 +41,44 @@ struct enetc_tx_swbd { ...@@ -32,20 +41,44 @@ struct enetc_tx_swbd {
#define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */
#define ENETC_RXB_DMA_SIZE \ #define ENETC_RXB_DMA_SIZE \
(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD) (SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - ENETC_RXB_PAD)
#define ENETC_RXB_DMA_SIZE_XDP \
(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM)
struct enetc_rx_swbd { struct enetc_rx_swbd {
dma_addr_t dma; dma_addr_t dma;
struct page *page; struct page *page;
u16 page_offset; u16 page_offset;
enum dma_data_direction dir;
u16 len;
}; };
/* ENETC overhead: optional extension BD + 1 BD gap */
#define ENETC_TXBDS_NEEDED(val) ((val) + 2)
/* max # of chained Tx BDs is 15, including head and extension BD */
#define ENETC_MAX_SKB_FRAGS 13
#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
struct enetc_ring_stats { struct enetc_ring_stats {
unsigned int packets; unsigned int packets;
unsigned int bytes; unsigned int bytes;
unsigned int rx_alloc_errs; unsigned int rx_alloc_errs;
unsigned int xdp_drops;
unsigned int xdp_tx;
unsigned int xdp_tx_drops;
unsigned int xdp_redirect;
unsigned int xdp_redirect_failures;
unsigned int xdp_redirect_sg;
unsigned int recycles;
unsigned int recycle_failures;
};
struct enetc_xdp_data {
struct xdp_rxq_info rxq;
struct bpf_prog *prog;
int xdp_tx_in_flight;
}; };
#define ENETC_RX_RING_DEFAULT_SIZE 512 #define ENETC_RX_RING_DEFAULT_SIZE 2048
#define ENETC_TX_RING_DEFAULT_SIZE 256 #define ENETC_TX_RING_DEFAULT_SIZE 256
#define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2) #define ENETC_DEFAULT_TX_WORK (ENETC_TX_RING_DEFAULT_SIZE / 2)
...@@ -71,6 +104,9 @@ struct enetc_bdr { ...@@ -71,6 +104,9 @@ struct enetc_bdr {
}; };
void __iomem *idr; /* Interrupt Detect Register pointer */ void __iomem *idr; /* Interrupt Detect Register pointer */
int buffer_offset;
struct enetc_xdp_data xdp;
struct enetc_ring_stats stats; struct enetc_ring_stats stats;
dma_addr_t bd_dma_base; dma_addr_t bd_dma_base;
...@@ -92,6 +128,14 @@ static inline int enetc_bd_unused(struct enetc_bdr *bdr) ...@@ -92,6 +128,14 @@ static inline int enetc_bd_unused(struct enetc_bdr *bdr)
return bdr->bd_count + bdr->next_to_clean - bdr->next_to_use - 1; return bdr->bd_count + bdr->next_to_clean - bdr->next_to_use - 1;
} }
static inline int enetc_swbd_unused(struct enetc_bdr *bdr)
{
if (bdr->next_to_clean > bdr->next_to_alloc)
return bdr->next_to_clean - bdr->next_to_alloc - 1;
return bdr->bd_count + bdr->next_to_clean - bdr->next_to_alloc - 1;
}
/* Control BD ring */ /* Control BD ring */
#define ENETC_CBDR_DEFAULT_SIZE 64 #define ENETC_CBDR_DEFAULT_SIZE 64
struct enetc_cbdr { struct enetc_cbdr {
...@@ -275,6 +319,8 @@ struct enetc_ndev_priv { ...@@ -275,6 +319,8 @@ struct enetc_ndev_priv {
struct phylink *phylink; struct phylink *phylink;
int ic_mode; int ic_mode;
u32 tx_ictt; u32 tx_ictt;
struct bpf_prog *xdp_prog;
}; };
/* Messaging */ /* Messaging */
...@@ -314,6 +360,9 @@ int enetc_set_features(struct net_device *ndev, ...@@ -314,6 +360,9 @@ int enetc_set_features(struct net_device *ndev,
int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd); int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
void *type_data); void *type_data);
int enetc_setup_bpf(struct net_device *dev, struct netdev_bpf *xdp);
int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
struct xdp_frame **frames, u32 flags);
/* ethtool */ /* ethtool */
void enetc_set_ethtool_ops(struct net_device *ndev); void enetc_set_ethtool_ops(struct net_device *ndev);
......
...@@ -192,10 +192,18 @@ static const struct { ...@@ -192,10 +192,18 @@ static const struct {
static const char rx_ring_stats[][ETH_GSTRING_LEN] = { static const char rx_ring_stats[][ETH_GSTRING_LEN] = {
"Rx ring %2d frames", "Rx ring %2d frames",
"Rx ring %2d alloc errors", "Rx ring %2d alloc errors",
"Rx ring %2d XDP drops",
"Rx ring %2d recycles",
"Rx ring %2d recycle failures",
"Rx ring %2d redirects",
"Rx ring %2d redirect failures",
"Rx ring %2d redirect S/G",
}; };
static const char tx_ring_stats[][ETH_GSTRING_LEN] = { static const char tx_ring_stats[][ETH_GSTRING_LEN] = {
"Tx ring %2d frames", "Tx ring %2d frames",
"Tx ring %2d XDP frames",
"Tx ring %2d XDP drops",
}; };
static int enetc_get_sset_count(struct net_device *ndev, int sset) static int enetc_get_sset_count(struct net_device *ndev, int sset)
...@@ -267,12 +275,21 @@ static void enetc_get_ethtool_stats(struct net_device *ndev, ...@@ -267,12 +275,21 @@ static void enetc_get_ethtool_stats(struct net_device *ndev,
for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++) for (i = 0; i < ARRAY_SIZE(enetc_si_counters); i++)
data[o++] = enetc_rd64(hw, enetc_si_counters[i].reg); data[o++] = enetc_rd64(hw, enetc_si_counters[i].reg);
for (i = 0; i < priv->num_tx_rings; i++) for (i = 0; i < priv->num_tx_rings; i++) {
data[o++] = priv->tx_ring[i]->stats.packets; data[o++] = priv->tx_ring[i]->stats.packets;
data[o++] = priv->tx_ring[i]->stats.xdp_tx;
data[o++] = priv->tx_ring[i]->stats.xdp_tx_drops;
}
for (i = 0; i < priv->num_rx_rings; i++) { for (i = 0; i < priv->num_rx_rings; i++) {
data[o++] = priv->rx_ring[i]->stats.packets; data[o++] = priv->rx_ring[i]->stats.packets;
data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs; data[o++] = priv->rx_ring[i]->stats.rx_alloc_errs;
data[o++] = priv->rx_ring[i]->stats.xdp_drops;
data[o++] = priv->rx_ring[i]->stats.recycles;
data[o++] = priv->rx_ring[i]->stats.recycle_failures;
data[o++] = priv->rx_ring[i]->stats.xdp_redirect;
data[o++] = priv->rx_ring[i]->stats.xdp_redirect_failures;
data[o++] = priv->rx_ring[i]->stats.xdp_redirect_sg;
} }
if (!enetc_si_is_pf(priv->si)) if (!enetc_si_is_pf(priv->si))
......
...@@ -707,6 +707,8 @@ static const struct net_device_ops enetc_ndev_ops = { ...@@ -707,6 +707,8 @@ static const struct net_device_ops enetc_ndev_ops = {
.ndo_set_features = enetc_pf_set_features, .ndo_set_features = enetc_pf_set_features,
.ndo_do_ioctl = enetc_ioctl, .ndo_do_ioctl = enetc_ioctl,
.ndo_setup_tc = enetc_setup_tc, .ndo_setup_tc = enetc_setup_tc,
.ndo_bpf = enetc_setup_bpf,
.ndo_xdp_xmit = enetc_xdp_xmit,
}; };
static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment