Commit 9610a8dc authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'tsnep-xdp-socket-zero-copy-support'

Gerhard Engleder says:

====================
tsnep: XDP socket zero-copy support

Implement XDP socket zero-copy support for tsnep driver. I tried to
follow existing drivers like igc as far as possible. But one main
difference is that tsnep does not need any reconfiguration for XDP BPF
program setup. So I decided to keep this behavior no matter if a XSK
pool is used or not. As a result, tsnep starts using the XSK pool even
if no XDP BPF program is available.

Another difference is that I tried to prevent potentially failing
allocations during XSK pool setup. E.g. both memory models for page pool
and XSK pool are registered all the time. Thus, XSK pool setup cannot
end up with not working queues.

Some prework is done to reduce the last two XSK commits to actual XSK
changes.
====================

Link: https://lore.kernel.org/r/20230421194656.48063-1-gerhard@engleder-embedded.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 938f65ad cd275c23
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#define TSNEP "tsnep" #define TSNEP "tsnep"
#define TSNEP_RING_SIZE 256 #define TSNEP_RING_SIZE 256
#define TSNEP_RING_MASK (TSNEP_RING_SIZE - 1)
#define TSNEP_RING_RX_REFILL 16 #define TSNEP_RING_RX_REFILL 16
#define TSNEP_RING_RX_REUSE (TSNEP_RING_SIZE - TSNEP_RING_SIZE / 4) #define TSNEP_RING_RX_REUSE (TSNEP_RING_SIZE - TSNEP_RING_SIZE / 4)
#define TSNEP_RING_ENTRIES_PER_PAGE (PAGE_SIZE / TSNEP_DESC_SIZE) #define TSNEP_RING_ENTRIES_PER_PAGE (PAGE_SIZE / TSNEP_DESC_SIZE)
...@@ -69,6 +70,7 @@ struct tsnep_tx_entry { ...@@ -69,6 +70,7 @@ struct tsnep_tx_entry {
union { union {
struct sk_buff *skb; struct sk_buff *skb;
struct xdp_frame *xdpf; struct xdp_frame *xdpf;
bool zc;
}; };
size_t len; size_t len;
DEFINE_DMA_UNMAP_ADDR(dma); DEFINE_DMA_UNMAP_ADDR(dma);
...@@ -87,6 +89,7 @@ struct tsnep_tx { ...@@ -87,6 +89,7 @@ struct tsnep_tx {
int read; int read;
u32 owner_counter; u32 owner_counter;
int increment_owner_counter; int increment_owner_counter;
struct xsk_buff_pool *xsk_pool;
u32 packets; u32 packets;
u32 bytes; u32 bytes;
...@@ -100,7 +103,10 @@ struct tsnep_rx_entry { ...@@ -100,7 +103,10 @@ struct tsnep_rx_entry {
u32 properties; u32 properties;
struct page *page; union {
struct page *page;
struct xdp_buff *xdp;
};
size_t len; size_t len;
dma_addr_t dma; dma_addr_t dma;
}; };
...@@ -120,6 +126,9 @@ struct tsnep_rx { ...@@ -120,6 +126,9 @@ struct tsnep_rx {
u32 owner_counter; u32 owner_counter;
int increment_owner_counter; int increment_owner_counter;
struct page_pool *page_pool; struct page_pool *page_pool;
struct page **page_buffer;
struct xsk_buff_pool *xsk_pool;
struct xdp_buff **xdp_batch;
u32 packets; u32 packets;
u32 bytes; u32 bytes;
...@@ -128,6 +137,7 @@ struct tsnep_rx { ...@@ -128,6 +137,7 @@ struct tsnep_rx {
u32 alloc_failed; u32 alloc_failed;
struct xdp_rxq_info xdp_rxq; struct xdp_rxq_info xdp_rxq;
struct xdp_rxq_info xdp_rxq_zc;
}; };
struct tsnep_queue { struct tsnep_queue {
...@@ -213,6 +223,8 @@ int tsnep_rxnfc_del_rule(struct tsnep_adapter *adapter, ...@@ -213,6 +223,8 @@ int tsnep_rxnfc_del_rule(struct tsnep_adapter *adapter,
int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct bpf_prog *prog, int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct bpf_prog *prog,
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int tsnep_xdp_setup_pool(struct tsnep_adapter *adapter,
struct xsk_buff_pool *pool, u16 queue_id);
#if IS_ENABLED(CONFIG_TSNEP_SELFTESTS) #if IS_ENABLED(CONFIG_TSNEP_SELFTESTS)
int tsnep_ethtool_get_test_count(void); int tsnep_ethtool_get_test_count(void);
...@@ -241,5 +253,7 @@ static inline void tsnep_ethtool_self_test(struct net_device *dev, ...@@ -241,5 +253,7 @@ static inline void tsnep_ethtool_self_test(struct net_device *dev,
void tsnep_get_system_time(struct tsnep_adapter *adapter, u64 *time); void tsnep_get_system_time(struct tsnep_adapter *adapter, u64 *time);
int tsnep_set_irq_coalesce(struct tsnep_queue *queue, u32 usecs); int tsnep_set_irq_coalesce(struct tsnep_queue *queue, u32 usecs);
u32 tsnep_get_irq_coalesce(struct tsnep_queue *queue); u32 tsnep_get_irq_coalesce(struct tsnep_queue *queue);
int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool);
void tsnep_disable_xsk(struct tsnep_queue *queue);
#endif /* _TSNEP_H */ #endif /* _TSNEP_H */
...@@ -28,11 +28,16 @@ ...@@ -28,11 +28,16 @@
#include <linux/iopoll.h> #include <linux/iopoll.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/bpf_trace.h> #include <linux/bpf_trace.h>
#include <net/xdp_sock_drv.h>
#define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN) #define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN)
#define TSNEP_HEADROOM ALIGN(TSNEP_RX_OFFSET, 4) #define TSNEP_HEADROOM ALIGN(TSNEP_RX_OFFSET, 4)
#define TSNEP_MAX_RX_BUF_SIZE (PAGE_SIZE - TSNEP_HEADROOM - \ #define TSNEP_MAX_RX_BUF_SIZE (PAGE_SIZE - TSNEP_HEADROOM - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
/* XSK buffer shall store at least Q-in-Q frame */
#define TSNEP_XSK_RX_BUF_SIZE (ALIGN(TSNEP_RX_INLINE_METADATA_SIZE + \
ETH_FRAME_LEN + ETH_FCS_LEN + \
VLAN_HLEN * 2, 4))
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
#define DMA_ADDR_HIGH(dma_addr) ((u32)(((dma_addr) >> 32) & 0xFFFFFFFF)) #define DMA_ADDR_HIGH(dma_addr) ((u32)(((dma_addr) >> 32) & 0xFFFFFFFF))
...@@ -49,6 +54,8 @@ ...@@ -49,6 +54,8 @@
#define TSNEP_TX_TYPE_SKB_FRAG BIT(1) #define TSNEP_TX_TYPE_SKB_FRAG BIT(1)
#define TSNEP_TX_TYPE_XDP_TX BIT(2) #define TSNEP_TX_TYPE_XDP_TX BIT(2)
#define TSNEP_TX_TYPE_XDP_NDO BIT(3) #define TSNEP_TX_TYPE_XDP_NDO BIT(3)
#define TSNEP_TX_TYPE_XDP (TSNEP_TX_TYPE_XDP_TX | TSNEP_TX_TYPE_XDP_NDO)
#define TSNEP_TX_TYPE_XSK BIT(4)
#define TSNEP_XDP_TX BIT(0) #define TSNEP_XDP_TX BIT(0)
#define TSNEP_XDP_REDIRECT BIT(1) #define TSNEP_XDP_REDIRECT BIT(1)
...@@ -265,7 +272,7 @@ static void tsnep_tx_ring_cleanup(struct tsnep_tx *tx) ...@@ -265,7 +272,7 @@ static void tsnep_tx_ring_cleanup(struct tsnep_tx *tx)
} }
} }
static int tsnep_tx_ring_init(struct tsnep_tx *tx) static int tsnep_tx_ring_create(struct tsnep_tx *tx)
{ {
struct device *dmadev = tx->adapter->dmadev; struct device *dmadev = tx->adapter->dmadev;
struct tsnep_tx_entry *entry; struct tsnep_tx_entry *entry;
...@@ -288,11 +295,12 @@ static int tsnep_tx_ring_init(struct tsnep_tx *tx) ...@@ -288,11 +295,12 @@ static int tsnep_tx_ring_init(struct tsnep_tx *tx)
entry->desc = (struct tsnep_tx_desc *) entry->desc = (struct tsnep_tx_desc *)
(((u8 *)entry->desc_wb) + TSNEP_DESC_OFFSET); (((u8 *)entry->desc_wb) + TSNEP_DESC_OFFSET);
entry->desc_dma = tx->page_dma[i] + TSNEP_DESC_SIZE * j; entry->desc_dma = tx->page_dma[i] + TSNEP_DESC_SIZE * j;
entry->owner_user_flag = false;
} }
} }
for (i = 0; i < TSNEP_RING_SIZE; i++) { for (i = 0; i < TSNEP_RING_SIZE; i++) {
entry = &tx->entry[i]; entry = &tx->entry[i];
next_entry = &tx->entry[(i + 1) % TSNEP_RING_SIZE]; next_entry = &tx->entry[(i + 1) & TSNEP_RING_MASK];
entry->desc->next = __cpu_to_le64(next_entry->desc_dma); entry->desc->next = __cpu_to_le64(next_entry->desc_dma);
} }
...@@ -303,13 +311,60 @@ static int tsnep_tx_ring_init(struct tsnep_tx *tx) ...@@ -303,13 +311,60 @@ static int tsnep_tx_ring_init(struct tsnep_tx *tx)
return retval; return retval;
} }
static void tsnep_tx_init(struct tsnep_tx *tx)
{
dma_addr_t dma;
dma = tx->entry[0].desc_dma | TSNEP_RESET_OWNER_COUNTER;
iowrite32(DMA_ADDR_LOW(dma), tx->addr + TSNEP_TX_DESC_ADDR_LOW);
iowrite32(DMA_ADDR_HIGH(dma), tx->addr + TSNEP_TX_DESC_ADDR_HIGH);
tx->write = 0;
tx->read = 0;
tx->owner_counter = 1;
tx->increment_owner_counter = TSNEP_RING_SIZE - 1;
}
static void tsnep_tx_enable(struct tsnep_tx *tx)
{
struct netdev_queue *nq;
nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
__netif_tx_lock_bh(nq);
netif_tx_wake_queue(nq);
__netif_tx_unlock_bh(nq);
}
static void tsnep_tx_disable(struct tsnep_tx *tx, struct napi_struct *napi)
{
struct netdev_queue *nq;
u32 val;
nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index);
__netif_tx_lock_bh(nq);
netif_tx_stop_queue(nq);
__netif_tx_unlock_bh(nq);
/* wait until TX is done in hardware */
readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
1000000);
/* wait until TX is also done in software */
while (READ_ONCE(tx->read) != tx->write) {
napi_schedule(napi);
napi_synchronize(napi);
}
}
static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length, static void tsnep_tx_activate(struct tsnep_tx *tx, int index, int length,
bool last) bool last)
{ {
struct tsnep_tx_entry *entry = &tx->entry[index]; struct tsnep_tx_entry *entry = &tx->entry[index];
entry->properties = 0; entry->properties = 0;
/* xdpf is union with skb */ /* xdpf and zc are union with skb */
if (entry->skb) { if (entry->skb) {
entry->properties = length & TSNEP_DESC_LENGTH_MASK; entry->properties = length & TSNEP_DESC_LENGTH_MASK;
entry->properties |= TSNEP_DESC_INTERRUPT_FLAG; entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
...@@ -381,7 +436,7 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count) ...@@ -381,7 +436,7 @@ static int tsnep_tx_map(struct sk_buff *skb, struct tsnep_tx *tx, int count)
int i; int i;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
entry = &tx->entry[(tx->write + i) % TSNEP_RING_SIZE]; entry = &tx->entry[(tx->write + i) & TSNEP_RING_MASK];
if (!i) { if (!i) {
len = skb_headlen(skb); len = skb_headlen(skb);
...@@ -419,7 +474,7 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count) ...@@ -419,7 +474,7 @@ static int tsnep_tx_unmap(struct tsnep_tx *tx, int index, int count)
int i; int i;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
entry = &tx->entry[(index + i) % TSNEP_RING_SIZE]; entry = &tx->entry[(index + i) & TSNEP_RING_MASK];
if (entry->len) { if (entry->len) {
if (entry->type & TSNEP_TX_TYPE_SKB) if (entry->type & TSNEP_TX_TYPE_SKB)
...@@ -481,9 +536,9 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, ...@@ -481,9 +536,9 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb,
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE, length, tsnep_tx_activate(tx, (tx->write + i) & TSNEP_RING_MASK, length,
i == count - 1); i == count - 1);
tx->write = (tx->write + count) % TSNEP_RING_SIZE; tx->write = (tx->write + count) & TSNEP_RING_MASK;
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
...@@ -516,7 +571,7 @@ static int tsnep_xdp_tx_map(struct xdp_frame *xdpf, struct tsnep_tx *tx, ...@@ -516,7 +571,7 @@ static int tsnep_xdp_tx_map(struct xdp_frame *xdpf, struct tsnep_tx *tx,
frag = NULL; frag = NULL;
len = xdpf->len; len = xdpf->len;
for (i = 0; i < count; i++) { for (i = 0; i < count; i++) {
entry = &tx->entry[(tx->write + i) % TSNEP_RING_SIZE]; entry = &tx->entry[(tx->write + i) & TSNEP_RING_MASK];
if (type & TSNEP_TX_TYPE_XDP_NDO) { if (type & TSNEP_TX_TYPE_XDP_NDO) {
data = unlikely(frag) ? skb_frag_address(frag) : data = unlikely(frag) ? skb_frag_address(frag) :
xdpf->data; xdpf->data;
...@@ -589,9 +644,9 @@ static bool tsnep_xdp_xmit_frame_ring(struct xdp_frame *xdpf, ...@@ -589,9 +644,9 @@ static bool tsnep_xdp_xmit_frame_ring(struct xdp_frame *xdpf,
length = retval; length = retval;
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
tsnep_tx_activate(tx, (tx->write + i) % TSNEP_RING_SIZE, length, tsnep_tx_activate(tx, (tx->write + i) & TSNEP_RING_MASK, length,
i == count - 1); i == count - 1);
tx->write = (tx->write + count) % TSNEP_RING_SIZE; tx->write = (tx->write + count) & TSNEP_RING_MASK;
/* descriptor properties shall be valid before hardware is notified */ /* descriptor properties shall be valid before hardware is notified */
dma_wmb(); dma_wmb();
...@@ -627,10 +682,69 @@ static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter, ...@@ -627,10 +682,69 @@ static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter,
return xmit; return xmit;
} }
static int tsnep_xdp_tx_map_zc(struct xdp_desc *xdpd, struct tsnep_tx *tx)
{
struct tsnep_tx_entry *entry;
dma_addr_t dma;
entry = &tx->entry[tx->write];
entry->zc = true;
dma = xsk_buff_raw_get_dma(tx->xsk_pool, xdpd->addr);
xsk_buff_raw_dma_sync_for_device(tx->xsk_pool, dma, xdpd->len);
entry->type = TSNEP_TX_TYPE_XSK;
entry->len = xdpd->len;
entry->desc->tx = __cpu_to_le64(dma);
return xdpd->len;
}
static void tsnep_xdp_xmit_frame_ring_zc(struct xdp_desc *xdpd,
struct tsnep_tx *tx)
{
int length;
length = tsnep_xdp_tx_map_zc(xdpd, tx);
tsnep_tx_activate(tx, tx->write, length, true);
tx->write = (tx->write + 1) & TSNEP_RING_MASK;
}
static void tsnep_xdp_xmit_zc(struct tsnep_tx *tx)
{
int desc_available = tsnep_tx_desc_available(tx);
struct xdp_desc *descs = tx->xsk_pool->tx_descs;
int batch, i;
/* ensure that TX ring is not filled up by XDP, always MAX_SKB_FRAGS
* will be available for normal TX path and queue is stopped there if
* necessary
*/
if (desc_available <= (MAX_SKB_FRAGS + 1))
return;
desc_available -= MAX_SKB_FRAGS + 1;
batch = xsk_tx_peek_release_desc_batch(tx->xsk_pool, desc_available);
for (i = 0; i < batch; i++)
tsnep_xdp_xmit_frame_ring_zc(&descs[i], tx);
if (batch) {
/* descriptor properties shall be valid before hardware is
* notified
*/
dma_wmb();
tsnep_xdp_xmit_flush(tx);
}
}
static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
{ {
struct tsnep_tx_entry *entry; struct tsnep_tx_entry *entry;
struct netdev_queue *nq; struct netdev_queue *nq;
int xsk_frames = 0;
int budget = 128; int budget = 128;
int length; int length;
int count; int count;
...@@ -657,7 +771,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) ...@@ -657,7 +771,7 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
if ((entry->type & TSNEP_TX_TYPE_SKB) && if ((entry->type & TSNEP_TX_TYPE_SKB) &&
skb_shinfo(entry->skb)->nr_frags > 0) skb_shinfo(entry->skb)->nr_frags > 0)
count += skb_shinfo(entry->skb)->nr_frags; count += skb_shinfo(entry->skb)->nr_frags;
else if (!(entry->type & TSNEP_TX_TYPE_SKB) && else if ((entry->type & TSNEP_TX_TYPE_XDP) &&
xdp_frame_has_frags(entry->xdpf)) xdp_frame_has_frags(entry->xdpf))
count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags; count += xdp_get_shared_info_from_frame(entry->xdpf)->nr_frags;
...@@ -686,12 +800,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) ...@@ -686,12 +800,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
if (entry->type & TSNEP_TX_TYPE_SKB) if (entry->type & TSNEP_TX_TYPE_SKB)
napi_consume_skb(entry->skb, napi_budget); napi_consume_skb(entry->skb, napi_budget);
else else if (entry->type & TSNEP_TX_TYPE_XDP)
xdp_return_frame_rx_napi(entry->xdpf); xdp_return_frame_rx_napi(entry->xdpf);
/* xdpf is union with skb */ else
xsk_frames++;
/* xdpf and zc are union with skb */
entry->skb = NULL; entry->skb = NULL;
tx->read = (tx->read + count) % TSNEP_RING_SIZE; tx->read = (tx->read + count) & TSNEP_RING_MASK;
tx->packets++; tx->packets++;
tx->bytes += length + ETH_FCS_LEN; tx->bytes += length + ETH_FCS_LEN;
...@@ -699,6 +815,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) ...@@ -699,6 +815,14 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget)
budget--; budget--;
} while (likely(budget)); } while (likely(budget));
if (tx->xsk_pool) {
if (xsk_frames)
xsk_tx_completed(tx->xsk_pool, xsk_frames);
if (xsk_uses_need_wakeup(tx->xsk_pool))
xsk_set_tx_need_wakeup(tx->xsk_pool);
tsnep_xdp_xmit_zc(tx);
}
if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) && if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) &&
netif_tx_queue_stopped(nq)) { netif_tx_queue_stopped(nq)) {
netif_tx_wake_queue(nq); netif_tx_wake_queue(nq);
...@@ -731,38 +855,21 @@ static bool tsnep_tx_pending(struct tsnep_tx *tx) ...@@ -731,38 +855,21 @@ static bool tsnep_tx_pending(struct tsnep_tx *tx)
return pending; return pending;
} }
static int tsnep_tx_open(struct tsnep_adapter *adapter, void __iomem *addr, static int tsnep_tx_open(struct tsnep_tx *tx)
int queue_index, struct tsnep_tx *tx)
{ {
dma_addr_t dma;
int retval; int retval;
memset(tx, 0, sizeof(*tx)); retval = tsnep_tx_ring_create(tx);
tx->adapter = adapter;
tx->addr = addr;
tx->queue_index = queue_index;
retval = tsnep_tx_ring_init(tx);
if (retval) if (retval)
return retval; return retval;
dma = tx->entry[0].desc_dma | TSNEP_RESET_OWNER_COUNTER; tsnep_tx_init(tx);
iowrite32(DMA_ADDR_LOW(dma), tx->addr + TSNEP_TX_DESC_ADDR_LOW);
iowrite32(DMA_ADDR_HIGH(dma), tx->addr + TSNEP_TX_DESC_ADDR_HIGH);
tx->owner_counter = 1;
tx->increment_owner_counter = TSNEP_RING_SIZE - 1;
return 0; return 0;
} }
static void tsnep_tx_close(struct tsnep_tx *tx) static void tsnep_tx_close(struct tsnep_tx *tx)
{ {
u32 val;
readx_poll_timeout(ioread32, tx->addr + TSNEP_CONTROL, val,
((val & TSNEP_CONTROL_TX_ENABLE) == 0), 10000,
1000000);
tsnep_tx_ring_cleanup(tx); tsnep_tx_ring_cleanup(tx);
} }
...@@ -774,9 +881,12 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx) ...@@ -774,9 +881,12 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
for (i = 0; i < TSNEP_RING_SIZE; i++) { for (i = 0; i < TSNEP_RING_SIZE; i++) {
entry = &rx->entry[i]; entry = &rx->entry[i];
if (entry->page) if (!rx->xsk_pool && entry->page)
page_pool_put_full_page(rx->page_pool, entry->page, page_pool_put_full_page(rx->page_pool, entry->page,
false); false);
if (rx->xsk_pool && entry->xdp)
xsk_buff_free(entry->xdp);
/* xdp is union with page */
entry->page = NULL; entry->page = NULL;
} }
...@@ -795,7 +905,7 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx) ...@@ -795,7 +905,7 @@ static void tsnep_rx_ring_cleanup(struct tsnep_rx *rx)
} }
} }
static int tsnep_rx_ring_init(struct tsnep_rx *rx) static int tsnep_rx_ring_create(struct tsnep_rx *rx)
{ {
struct device *dmadev = rx->adapter->dmadev; struct device *dmadev = rx->adapter->dmadev;
struct tsnep_rx_entry *entry; struct tsnep_rx_entry *entry;
...@@ -839,7 +949,7 @@ static int tsnep_rx_ring_init(struct tsnep_rx *rx) ...@@ -839,7 +949,7 @@ static int tsnep_rx_ring_init(struct tsnep_rx *rx)
for (i = 0; i < TSNEP_RING_SIZE; i++) { for (i = 0; i < TSNEP_RING_SIZE; i++) {
entry = &rx->entry[i]; entry = &rx->entry[i];
next_entry = &rx->entry[(i + 1) % TSNEP_RING_SIZE]; next_entry = &rx->entry[(i + 1) & TSNEP_RING_MASK];
entry->desc->next = __cpu_to_le64(next_entry->desc_dma); entry->desc->next = __cpu_to_le64(next_entry->desc_dma);
} }
...@@ -850,6 +960,37 @@ static int tsnep_rx_ring_init(struct tsnep_rx *rx) ...@@ -850,6 +960,37 @@ static int tsnep_rx_ring_init(struct tsnep_rx *rx)
return retval; return retval;
} }
static void tsnep_rx_init(struct tsnep_rx *rx)
{
dma_addr_t dma;
dma = rx->entry[0].desc_dma | TSNEP_RESET_OWNER_COUNTER;
iowrite32(DMA_ADDR_LOW(dma), rx->addr + TSNEP_RX_DESC_ADDR_LOW);
iowrite32(DMA_ADDR_HIGH(dma), rx->addr + TSNEP_RX_DESC_ADDR_HIGH);
rx->write = 0;
rx->read = 0;
rx->owner_counter = 1;
rx->increment_owner_counter = TSNEP_RING_SIZE - 1;
}
static void tsnep_rx_enable(struct tsnep_rx *rx)
{
/* descriptor properties shall be valid before hardware is notified */
dma_wmb();
iowrite32(TSNEP_CONTROL_RX_ENABLE, rx->addr + TSNEP_CONTROL);
}
static void tsnep_rx_disable(struct tsnep_rx *rx)
{
u32 val;
iowrite32(TSNEP_CONTROL_RX_DISABLE, rx->addr + TSNEP_CONTROL);
readx_poll_timeout(ioread32, rx->addr + TSNEP_CONTROL, val,
((val & TSNEP_CONTROL_RX_ENABLE) == 0), 10000,
1000000);
}
static int tsnep_rx_desc_available(struct tsnep_rx *rx) static int tsnep_rx_desc_available(struct tsnep_rx *rx)
{ {
if (rx->read <= rx->write) if (rx->read <= rx->write)
...@@ -858,6 +999,40 @@ static int tsnep_rx_desc_available(struct tsnep_rx *rx) ...@@ -858,6 +999,40 @@ static int tsnep_rx_desc_available(struct tsnep_rx *rx)
return rx->read - rx->write - 1; return rx->read - rx->write - 1;
} }
static void tsnep_rx_free_page_buffer(struct tsnep_rx *rx)
{
struct page **page;
/* last entry of page_buffer is always zero, because ring cannot be
* filled completely
*/
page = rx->page_buffer;
while (*page) {
page_pool_put_full_page(rx->page_pool, *page, false);
*page = NULL;
page++;
}
}
static int tsnep_rx_alloc_page_buffer(struct tsnep_rx *rx)
{
int i;
/* alloc for all ring entries except the last one, because ring cannot
* be filled completely
*/
for (i = 0; i < TSNEP_RING_SIZE - 1; i++) {
rx->page_buffer[i] = page_pool_dev_alloc_pages(rx->page_pool);
if (!rx->page_buffer[i]) {
tsnep_rx_free_page_buffer(rx);
return -ENOMEM;
}
}
return 0;
}
static void tsnep_rx_set_page(struct tsnep_rx *rx, struct tsnep_rx_entry *entry, static void tsnep_rx_set_page(struct tsnep_rx *rx, struct tsnep_rx_entry *entry,
struct page *page) struct page *page)
{ {
...@@ -893,7 +1068,7 @@ static void tsnep_rx_activate(struct tsnep_rx *rx, int index) ...@@ -893,7 +1068,7 @@ static void tsnep_rx_activate(struct tsnep_rx *rx, int index)
{ {
struct tsnep_rx_entry *entry = &rx->entry[index]; struct tsnep_rx_entry *entry = &rx->entry[index];
/* TSNEP_MAX_RX_BUF_SIZE is a multiple of 4 */ /* TSNEP_MAX_RX_BUF_SIZE and TSNEP_XSK_RX_BUF_SIZE are multiple of 4 */
entry->properties = entry->len & TSNEP_DESC_LENGTH_MASK; entry->properties = entry->len & TSNEP_DESC_LENGTH_MASK;
entry->properties |= TSNEP_DESC_INTERRUPT_FLAG; entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
if (index == rx->increment_owner_counter) { if (index == rx->increment_owner_counter) {
...@@ -916,19 +1091,15 @@ static void tsnep_rx_activate(struct tsnep_rx *rx, int index) ...@@ -916,19 +1091,15 @@ static void tsnep_rx_activate(struct tsnep_rx *rx, int index)
entry->desc->properties = __cpu_to_le32(entry->properties); entry->desc->properties = __cpu_to_le32(entry->properties);
} }
static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse) static int tsnep_rx_alloc(struct tsnep_rx *rx, int count, bool reuse)
{ {
int index;
bool alloc_failed = false; bool alloc_failed = false;
bool enable = false; int i, index;
int i;
int retval;
for (i = 0; i < count && !alloc_failed; i++) { for (i = 0; i < count && !alloc_failed; i++) {
index = (rx->write + i) % TSNEP_RING_SIZE; index = (rx->write + i) & TSNEP_RING_MASK;
retval = tsnep_rx_alloc_buffer(rx, index); if (unlikely(tsnep_rx_alloc_buffer(rx, index))) {
if (unlikely(retval)) {
rx->alloc_failed++; rx->alloc_failed++;
alloc_failed = true; alloc_failed = true;
...@@ -940,24 +1111,95 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse) ...@@ -940,24 +1111,95 @@ static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
} }
tsnep_rx_activate(rx, index); tsnep_rx_activate(rx, index);
enable = true;
} }
if (enable) { if (i)
rx->write = (rx->write + i) % TSNEP_RING_SIZE; rx->write = (rx->write + i) & TSNEP_RING_MASK;
/* descriptor properties shall be valid before hardware is return i;
* notified }
*/
dma_wmb(); static int tsnep_rx_refill(struct tsnep_rx *rx, int count, bool reuse)
{
int desc_refilled;
desc_refilled = tsnep_rx_alloc(rx, count, reuse);
if (desc_refilled)
tsnep_rx_enable(rx);
return desc_refilled;
}
static void tsnep_rx_set_xdp(struct tsnep_rx *rx, struct tsnep_rx_entry *entry,
struct xdp_buff *xdp)
{
entry->xdp = xdp;
entry->len = TSNEP_XSK_RX_BUF_SIZE;
entry->dma = xsk_buff_xdp_get_dma(entry->xdp);
entry->desc->rx = __cpu_to_le64(entry->dma);
}
iowrite32(TSNEP_CONTROL_RX_ENABLE, rx->addr + TSNEP_CONTROL); static void tsnep_rx_reuse_buffer_zc(struct tsnep_rx *rx, int index)
{
struct tsnep_rx_entry *entry = &rx->entry[index];
struct tsnep_rx_entry *read = &rx->entry[rx->read];
tsnep_rx_set_xdp(rx, entry, read->xdp);
read->xdp = NULL;
}
static int tsnep_rx_alloc_zc(struct tsnep_rx *rx, int count, bool reuse)
{
u32 allocated;
int i;
allocated = xsk_buff_alloc_batch(rx->xsk_pool, rx->xdp_batch, count);
for (i = 0; i < allocated; i++) {
int index = (rx->write + i) & TSNEP_RING_MASK;
struct tsnep_rx_entry *entry = &rx->entry[index];
tsnep_rx_set_xdp(rx, entry, rx->xdp_batch[i]);
tsnep_rx_activate(rx, index);
}
if (i == 0) {
rx->alloc_failed++;
if (reuse) {
tsnep_rx_reuse_buffer_zc(rx, rx->write);
tsnep_rx_activate(rx, rx->write);
}
} }
if (i)
rx->write = (rx->write + i) & TSNEP_RING_MASK;
return i; return i;
} }
static void tsnep_rx_free_zc(struct tsnep_rx *rx)
{
int i;
for (i = 0; i < TSNEP_RING_SIZE; i++) {
struct tsnep_rx_entry *entry = &rx->entry[i];
if (entry->xdp)
xsk_buff_free(entry->xdp);
entry->xdp = NULL;
}
}
static int tsnep_rx_refill_zc(struct tsnep_rx *rx, int count, bool reuse)
{
int desc_refilled;
desc_refilled = tsnep_rx_alloc_zc(rx, count, reuse);
if (desc_refilled)
tsnep_rx_enable(rx);
return desc_refilled;
}
static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
struct xdp_buff *xdp, int *status, struct xdp_buff *xdp, int *status,
struct netdev_queue *tx_nq, struct tsnep_tx *tx) struct netdev_queue *tx_nq, struct tsnep_tx *tx)
...@@ -969,11 +1211,6 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, ...@@ -969,11 +1211,6 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM; length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
act = bpf_prog_run_xdp(prog, xdp); act = bpf_prog_run_xdp(prog, xdp);
/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
sync = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
sync = max(sync, length);
switch (act) { switch (act) {
case XDP_PASS: case XDP_PASS:
return false; return false;
...@@ -995,12 +1232,56 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, ...@@ -995,12 +1232,56 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
trace_xdp_exception(rx->adapter->netdev, prog, act); trace_xdp_exception(rx->adapter->netdev, prog, act);
fallthrough; fallthrough;
case XDP_DROP: case XDP_DROP:
/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU
* touch
*/
sync = xdp->data_end - xdp->data_hard_start -
XDP_PACKET_HEADROOM;
sync = max(sync, length);
page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data), page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
sync, true); sync, true);
return true; return true;
} }
} }
static bool tsnep_xdp_run_prog_zc(struct tsnep_rx *rx, struct bpf_prog *prog,
struct xdp_buff *xdp, int *status,
struct netdev_queue *tx_nq,
struct tsnep_tx *tx)
{
u32 act;
act = bpf_prog_run_xdp(prog, xdp);
/* XDP_REDIRECT is the main action for zero-copy */
if (likely(act == XDP_REDIRECT)) {
if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
goto out_failure;
*status |= TSNEP_XDP_REDIRECT;
return true;
}
switch (act) {
case XDP_PASS:
return false;
case XDP_TX:
if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
goto out_failure;
*status |= TSNEP_XDP_TX;
return true;
default:
bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
fallthrough;
case XDP_ABORTED:
out_failure:
trace_xdp_exception(rx->adapter->netdev, prog, act);
fallthrough;
case XDP_DROP:
xsk_buff_free(xdp);
return true;
}
}
static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status, static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status,
struct netdev_queue *tx_nq, struct tsnep_tx *tx) struct netdev_queue *tx_nq, struct tsnep_tx *tx)
{ {
...@@ -1045,6 +1326,28 @@ static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page, ...@@ -1045,6 +1326,28 @@ static struct sk_buff *tsnep_build_skb(struct tsnep_rx *rx, struct page *page,
return skb; return skb;
} }
static void tsnep_rx_page(struct tsnep_rx *rx, struct napi_struct *napi,
struct page *page, int length)
{
struct sk_buff *skb;
skb = tsnep_build_skb(rx, page, length);
if (skb) {
page_pool_release_page(rx->page_pool, page);
rx->packets++;
rx->bytes += length;
if (skb->pkt_type == PACKET_MULTICAST)
rx->multicast++;
napi_gro_receive(napi, skb);
} else {
page_pool_recycle_direct(rx->page_pool, page);
rx->dropped++;
}
}
static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
int budget) int budget)
{ {
...@@ -1054,7 +1357,6 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, ...@@ -1054,7 +1357,6 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
struct netdev_queue *tx_nq; struct netdev_queue *tx_nq;
struct bpf_prog *prog; struct bpf_prog *prog;
struct xdp_buff xdp; struct xdp_buff xdp;
struct sk_buff *skb;
struct tsnep_tx *tx; struct tsnep_tx *tx;
int desc_available; int desc_available;
int xdp_status = 0; int xdp_status = 0;
...@@ -1090,7 +1392,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, ...@@ -1090,7 +1392,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
* empty RX ring, thus buffer cannot be used for * empty RX ring, thus buffer cannot be used for
* RX processing * RX processing
*/ */
rx->read = (rx->read + 1) % TSNEP_RING_SIZE; rx->read = (rx->read + 1) & TSNEP_RING_MASK;
desc_available++; desc_available++;
rx->dropped++; rx->dropped++;
...@@ -1117,7 +1419,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, ...@@ -1117,7 +1419,7 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
*/ */
length -= TSNEP_RX_INLINE_METADATA_SIZE; length -= TSNEP_RX_INLINE_METADATA_SIZE;
rx->read = (rx->read + 1) % TSNEP_RING_SIZE; rx->read = (rx->read + 1) & TSNEP_RING_MASK;
desc_available++; desc_available++;
if (prog) { if (prog) {
...@@ -1139,31 +1441,135 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi, ...@@ -1139,31 +1441,135 @@ static int tsnep_rx_poll(struct tsnep_rx *rx, struct napi_struct *napi,
} }
} }
skb = tsnep_build_skb(rx, entry->page, length); tsnep_rx_page(rx, napi, entry->page, length);
if (skb) { entry->page = NULL;
page_pool_release_page(rx->page_pool, entry->page); }
if (xdp_status)
tsnep_finalize_xdp(rx->adapter, xdp_status, tx_nq, tx);
if (desc_available)
tsnep_rx_refill(rx, desc_available, false);
rx->packets++; return done;
rx->bytes += length; }
if (skb->pkt_type == PACKET_MULTICAST)
rx->multicast++;
napi_gro_receive(napi, skb); static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi,
} else { int budget)
page_pool_recycle_direct(rx->page_pool, entry->page); {
struct tsnep_rx_entry *entry;
struct netdev_queue *tx_nq;
struct bpf_prog *prog;
struct tsnep_tx *tx;
int desc_available;
int xdp_status = 0;
struct page *page;
int done = 0;
int length;
desc_available = tsnep_rx_desc_available(rx);
prog = READ_ONCE(rx->adapter->xdp_prog);
if (prog) {
tx_nq = netdev_get_tx_queue(rx->adapter->netdev,
rx->tx_queue_index);
tx = &rx->adapter->tx[rx->tx_queue_index];
}
while (likely(done < budget) && (rx->read != rx->write)) {
entry = &rx->entry[rx->read];
if ((__le32_to_cpu(entry->desc_wb->properties) &
TSNEP_DESC_OWNER_COUNTER_MASK) !=
(entry->properties & TSNEP_DESC_OWNER_COUNTER_MASK))
break;
done++;
if (desc_available >= TSNEP_RING_RX_REFILL) {
bool reuse = desc_available >= TSNEP_RING_RX_REUSE;
desc_available -= tsnep_rx_refill_zc(rx, desc_available,
reuse);
if (!entry->xdp) {
/* buffer has been reused for refill to prevent
* empty RX ring, thus buffer cannot be used for
* RX processing
*/
rx->read = (rx->read + 1) & TSNEP_RING_MASK;
desc_available++;
rx->dropped++;
continue;
}
}
/* descriptor properties shall be read first, because valid data
* is signaled there
*/
dma_rmb();
prefetch(entry->xdp->data);
length = __le32_to_cpu(entry->desc_wb->properties) &
TSNEP_DESC_LENGTH_MASK;
xsk_buff_set_size(entry->xdp, length);
xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool);
/* RX metadata with timestamps is in front of actual data,
* subtract metadata size to get length of actual data and
* consider metadata size as offset of actual data during RX
* processing
*/
length -= TSNEP_RX_INLINE_METADATA_SIZE;
rx->read = (rx->read + 1) & TSNEP_RING_MASK;
desc_available++;
if (prog) {
bool consume;
entry->xdp->data += TSNEP_RX_INLINE_METADATA_SIZE;
entry->xdp->data_meta += TSNEP_RX_INLINE_METADATA_SIZE;
consume = tsnep_xdp_run_prog_zc(rx, prog, entry->xdp,
&xdp_status, tx_nq, tx);
if (consume) {
rx->packets++;
rx->bytes += length;
entry->xdp = NULL;
continue;
}
}
page = page_pool_dev_alloc_pages(rx->page_pool);
if (page) {
memcpy(page_address(page) + TSNEP_RX_OFFSET,
entry->xdp->data - TSNEP_RX_INLINE_METADATA_SIZE,
length + TSNEP_RX_INLINE_METADATA_SIZE);
tsnep_rx_page(rx, napi, page, length);
} else {
rx->dropped++; rx->dropped++;
} }
entry->page = NULL; xsk_buff_free(entry->xdp);
entry->xdp = NULL;
} }
if (xdp_status) if (xdp_status)
tsnep_finalize_xdp(rx->adapter, xdp_status, tx_nq, tx); tsnep_finalize_xdp(rx->adapter, xdp_status, tx_nq, tx);
if (desc_available) if (desc_available)
tsnep_rx_refill(rx, desc_available, false); desc_available -= tsnep_rx_refill_zc(rx, desc_available, false);
return done; if (xsk_uses_need_wakeup(rx->xsk_pool)) {
if (desc_available)
xsk_set_rx_need_wakeup(rx->xsk_pool);
else
xsk_clear_rx_need_wakeup(rx->xsk_pool);
return done;
}
return desc_available ? budget : done;
} }
static bool tsnep_rx_pending(struct tsnep_rx *rx) static bool tsnep_rx_pending(struct tsnep_rx *rx)
...@@ -1181,44 +1587,125 @@ static bool tsnep_rx_pending(struct tsnep_rx *rx) ...@@ -1181,44 +1587,125 @@ static bool tsnep_rx_pending(struct tsnep_rx *rx)
return false; return false;
} }
static int tsnep_rx_open(struct tsnep_adapter *adapter, void __iomem *addr, static int tsnep_rx_open(struct tsnep_rx *rx)
int queue_index, struct tsnep_rx *rx)
{ {
dma_addr_t dma; int desc_available;
int retval; int retval;
memset(rx, 0, sizeof(*rx)); retval = tsnep_rx_ring_create(rx);
rx->adapter = adapter;
rx->addr = addr;
rx->queue_index = queue_index;
retval = tsnep_rx_ring_init(rx);
if (retval) if (retval)
return retval; return retval;
dma = rx->entry[0].desc_dma | TSNEP_RESET_OWNER_COUNTER; tsnep_rx_init(rx);
iowrite32(DMA_ADDR_LOW(dma), rx->addr + TSNEP_RX_DESC_ADDR_LOW);
iowrite32(DMA_ADDR_HIGH(dma), rx->addr + TSNEP_RX_DESC_ADDR_HIGH);
rx->owner_counter = 1;
rx->increment_owner_counter = TSNEP_RING_SIZE - 1;
tsnep_rx_refill(rx, tsnep_rx_desc_available(rx), false); desc_available = tsnep_rx_desc_available(rx);
if (rx->xsk_pool)
retval = tsnep_rx_alloc_zc(rx, desc_available, false);
else
retval = tsnep_rx_alloc(rx, desc_available, false);
if (retval != desc_available) {
retval = -ENOMEM;
goto alloc_failed;
}
/* prealloc pages to prevent allocation failures when XSK pool is
* disabled at runtime
*/
if (rx->xsk_pool) {
retval = tsnep_rx_alloc_page_buffer(rx);
if (retval)
goto alloc_failed;
}
return 0; return 0;
alloc_failed:
tsnep_rx_ring_cleanup(rx);
return retval;
} }
static void tsnep_rx_close(struct tsnep_rx *rx) static void tsnep_rx_close(struct tsnep_rx *rx)
{ {
u32 val; if (rx->xsk_pool)
tsnep_rx_free_page_buffer(rx);
iowrite32(TSNEP_CONTROL_RX_DISABLE, rx->addr + TSNEP_CONTROL);
readx_poll_timeout(ioread32, rx->addr + TSNEP_CONTROL, val,
((val & TSNEP_CONTROL_RX_ENABLE) == 0), 10000,
1000000);
tsnep_rx_ring_cleanup(rx); tsnep_rx_ring_cleanup(rx);
} }
static void tsnep_rx_reopen(struct tsnep_rx *rx)
{
struct page **page = rx->page_buffer;
int i;
tsnep_rx_init(rx);
for (i = 0; i < TSNEP_RING_SIZE; i++) {
struct tsnep_rx_entry *entry = &rx->entry[i];
/* defined initial values for properties are required for
* correct owner counter checking
*/
entry->desc->properties = 0;
entry->desc_wb->properties = 0;
/* prevent allocation failures by reusing kept pages */
if (*page) {
tsnep_rx_set_page(rx, entry, *page);
tsnep_rx_activate(rx, rx->write);
rx->write++;
*page = NULL;
page++;
}
}
}
static void tsnep_rx_reopen_xsk(struct tsnep_rx *rx)
{
struct page **page = rx->page_buffer;
u32 allocated;
int i;
tsnep_rx_init(rx);
/* alloc all ring entries except the last one, because ring cannot be
* filled completely, as many buffers as possible is enough as wakeup is
* done if new buffers are available
*/
allocated = xsk_buff_alloc_batch(rx->xsk_pool, rx->xdp_batch,
TSNEP_RING_SIZE - 1);
for (i = 0; i < TSNEP_RING_SIZE; i++) {
struct tsnep_rx_entry *entry = &rx->entry[i];
/* keep pages to prevent allocation failures when xsk is
* disabled
*/
if (entry->page) {
*page = entry->page;
entry->page = NULL;
page++;
}
/* defined initial values for properties are required for
* correct owner counter checking
*/
entry->desc->properties = 0;
entry->desc_wb->properties = 0;
if (allocated) {
tsnep_rx_set_xdp(rx, entry,
rx->xdp_batch[allocated - 1]);
tsnep_rx_activate(rx, rx->write);
rx->write++;
allocated--;
}
}
}
static bool tsnep_pending(struct tsnep_queue *queue) static bool tsnep_pending(struct tsnep_queue *queue)
{ {
if (queue->tx && tsnep_tx_pending(queue->tx)) if (queue->tx && tsnep_tx_pending(queue->tx))
...@@ -1241,7 +1728,9 @@ static int tsnep_poll(struct napi_struct *napi, int budget) ...@@ -1241,7 +1728,9 @@ static int tsnep_poll(struct napi_struct *napi, int budget)
complete = tsnep_tx_poll(queue->tx, budget); complete = tsnep_tx_poll(queue->tx, budget);
if (queue->rx) { if (queue->rx) {
done = tsnep_rx_poll(queue->rx, napi, budget); done = queue->rx->xsk_pool ?
tsnep_rx_poll_zc(queue->rx, napi, budget) :
tsnep_rx_poll(queue->rx, napi, budget);
if (done >= budget) if (done >= budget)
complete = false; complete = false;
} }
...@@ -1322,8 +1811,12 @@ static void tsnep_queue_close(struct tsnep_queue *queue, bool first) ...@@ -1322,8 +1811,12 @@ static void tsnep_queue_close(struct tsnep_queue *queue, bool first)
tsnep_free_irq(queue, first); tsnep_free_irq(queue, first);
if (rx && xdp_rxq_info_is_reg(&rx->xdp_rxq)) if (rx) {
xdp_rxq_info_unreg(&rx->xdp_rxq); if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
xdp_rxq_info_unreg(&rx->xdp_rxq);
if (xdp_rxq_info_is_reg(&rx->xdp_rxq_zc))
xdp_rxq_info_unreg(&rx->xdp_rxq_zc);
}
netif_napi_del(&queue->napi); netif_napi_del(&queue->napi);
} }
...@@ -1335,8 +1828,6 @@ static int tsnep_queue_open(struct tsnep_adapter *adapter, ...@@ -1335,8 +1828,6 @@ static int tsnep_queue_open(struct tsnep_adapter *adapter,
struct tsnep_tx *tx = queue->tx; struct tsnep_tx *tx = queue->tx;
int retval; int retval;
queue->adapter = adapter;
netif_napi_add(adapter->netdev, &queue->napi, tsnep_poll); netif_napi_add(adapter->netdev, &queue->napi, tsnep_poll);
if (rx) { if (rx) {
...@@ -1348,6 +1839,10 @@ static int tsnep_queue_open(struct tsnep_adapter *adapter, ...@@ -1348,6 +1839,10 @@ static int tsnep_queue_open(struct tsnep_adapter *adapter,
else else
rx->tx_queue_index = 0; rx->tx_queue_index = 0;
/* prepare both memory models to eliminate possible registration
* errors when memory model is switched between page pool and
* XSK pool during runtime
*/
retval = xdp_rxq_info_reg(&rx->xdp_rxq, adapter->netdev, retval = xdp_rxq_info_reg(&rx->xdp_rxq, adapter->netdev,
rx->queue_index, queue->napi.napi_id); rx->queue_index, queue->napi.napi_id);
if (retval) if (retval)
...@@ -1357,6 +1852,17 @@ static int tsnep_queue_open(struct tsnep_adapter *adapter, ...@@ -1357,6 +1852,17 @@ static int tsnep_queue_open(struct tsnep_adapter *adapter,
rx->page_pool); rx->page_pool);
if (retval) if (retval)
goto failed; goto failed;
retval = xdp_rxq_info_reg(&rx->xdp_rxq_zc, adapter->netdev,
rx->queue_index, queue->napi.napi_id);
if (retval)
goto failed;
retval = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq_zc,
MEM_TYPE_XSK_BUFF_POOL,
NULL);
if (retval)
goto failed;
if (rx->xsk_pool)
xsk_pool_set_rxq_info(rx->xsk_pool, &rx->xdp_rxq_zc);
} }
retval = tsnep_request_irq(queue, first); retval = tsnep_request_irq(queue, first);
...@@ -1374,30 +1880,48 @@ static int tsnep_queue_open(struct tsnep_adapter *adapter, ...@@ -1374,30 +1880,48 @@ static int tsnep_queue_open(struct tsnep_adapter *adapter,
return retval; return retval;
} }
static void tsnep_queue_enable(struct tsnep_queue *queue)
{
napi_enable(&queue->napi);
tsnep_enable_irq(queue->adapter, queue->irq_mask);
if (queue->tx)
tsnep_tx_enable(queue->tx);
if (queue->rx)
tsnep_rx_enable(queue->rx);
}
static void tsnep_queue_disable(struct tsnep_queue *queue)
{
if (queue->tx)
tsnep_tx_disable(queue->tx, &queue->napi);
napi_disable(&queue->napi);
tsnep_disable_irq(queue->adapter, queue->irq_mask);
/* disable RX after NAPI polling has been disabled, because RX can be
* enabled during NAPI polling
*/
if (queue->rx)
tsnep_rx_disable(queue->rx);
}
static int tsnep_netdev_open(struct net_device *netdev) static int tsnep_netdev_open(struct net_device *netdev)
{ {
struct tsnep_adapter *adapter = netdev_priv(netdev); struct tsnep_adapter *adapter = netdev_priv(netdev);
int tx_queue_index = 0;
int rx_queue_index = 0;
void __iomem *addr;
int i, retval; int i, retval;
for (i = 0; i < adapter->num_queues; i++) { for (i = 0; i < adapter->num_queues; i++) {
if (adapter->queue[i].tx) { if (adapter->queue[i].tx) {
addr = adapter->addr + TSNEP_QUEUE(tx_queue_index); retval = tsnep_tx_open(adapter->queue[i].tx);
retval = tsnep_tx_open(adapter, addr, tx_queue_index,
adapter->queue[i].tx);
if (retval) if (retval)
goto failed; goto failed;
tx_queue_index++;
} }
if (adapter->queue[i].rx) { if (adapter->queue[i].rx) {
addr = adapter->addr + TSNEP_QUEUE(rx_queue_index); retval = tsnep_rx_open(adapter->queue[i].rx);
retval = tsnep_rx_open(adapter, addr, rx_queue_index,
adapter->queue[i].rx);
if (retval) if (retval)
goto failed; goto failed;
rx_queue_index++;
} }
retval = tsnep_queue_open(adapter, &adapter->queue[i], i == 0); retval = tsnep_queue_open(adapter, &adapter->queue[i], i == 0);
...@@ -1419,11 +1943,8 @@ static int tsnep_netdev_open(struct net_device *netdev) ...@@ -1419,11 +1943,8 @@ static int tsnep_netdev_open(struct net_device *netdev)
if (retval) if (retval)
goto phy_failed; goto phy_failed;
for (i = 0; i < adapter->num_queues; i++) { for (i = 0; i < adapter->num_queues; i++)
napi_enable(&adapter->queue[i].napi); tsnep_queue_enable(&adapter->queue[i]);
tsnep_enable_irq(adapter, adapter->queue[i].irq_mask);
}
return 0; return 0;
...@@ -1450,9 +1971,7 @@ static int tsnep_netdev_close(struct net_device *netdev) ...@@ -1450,9 +1971,7 @@ static int tsnep_netdev_close(struct net_device *netdev)
tsnep_phy_close(adapter); tsnep_phy_close(adapter);
for (i = 0; i < adapter->num_queues; i++) { for (i = 0; i < adapter->num_queues; i++) {
tsnep_disable_irq(adapter, adapter->queue[i].irq_mask); tsnep_queue_disable(&adapter->queue[i]);
napi_disable(&adapter->queue[i].napi);
tsnep_queue_close(&adapter->queue[i], i == 0); tsnep_queue_close(&adapter->queue[i], i == 0);
...@@ -1465,6 +1984,69 @@ static int tsnep_netdev_close(struct net_device *netdev) ...@@ -1465,6 +1984,69 @@ static int tsnep_netdev_close(struct net_device *netdev)
return 0; return 0;
} }
int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool)
{
bool running = netif_running(queue->adapter->netdev);
u32 frame_size;
frame_size = xsk_pool_get_rx_frame_size(pool);
if (frame_size < TSNEP_XSK_RX_BUF_SIZE)
return -EOPNOTSUPP;
queue->rx->page_buffer = kcalloc(TSNEP_RING_SIZE,
sizeof(*queue->rx->page_buffer),
GFP_KERNEL);
if (!queue->rx->page_buffer)
return -ENOMEM;
queue->rx->xdp_batch = kcalloc(TSNEP_RING_SIZE,
sizeof(*queue->rx->xdp_batch),
GFP_KERNEL);
if (!queue->rx->xdp_batch) {
kfree(queue->rx->page_buffer);
queue->rx->page_buffer = NULL;
return -ENOMEM;
}
xsk_pool_set_rxq_info(pool, &queue->rx->xdp_rxq_zc);
if (running)
tsnep_queue_disable(queue);
queue->tx->xsk_pool = pool;
queue->rx->xsk_pool = pool;
if (running) {
tsnep_rx_reopen_xsk(queue->rx);
tsnep_queue_enable(queue);
}
return 0;
}
void tsnep_disable_xsk(struct tsnep_queue *queue)
{
bool running = netif_running(queue->adapter->netdev);
if (running)
tsnep_queue_disable(queue);
tsnep_rx_free_zc(queue->rx);
queue->rx->xsk_pool = NULL;
queue->tx->xsk_pool = NULL;
if (running) {
tsnep_rx_reopen(queue->rx);
tsnep_queue_enable(queue);
}
kfree(queue->rx->xdp_batch);
queue->rx->xdp_batch = NULL;
kfree(queue->rx->page_buffer);
queue->rx->page_buffer = NULL;
}
static netdev_tx_t tsnep_netdev_xmit_frame(struct sk_buff *skb, static netdev_tx_t tsnep_netdev_xmit_frame(struct sk_buff *skb,
struct net_device *netdev) struct net_device *netdev)
{ {
...@@ -1614,6 +2196,9 @@ static int tsnep_netdev_bpf(struct net_device *dev, struct netdev_bpf *bpf) ...@@ -1614,6 +2196,9 @@ static int tsnep_netdev_bpf(struct net_device *dev, struct netdev_bpf *bpf)
switch (bpf->command) { switch (bpf->command) {
case XDP_SETUP_PROG: case XDP_SETUP_PROG:
return tsnep_xdp_setup_prog(adapter, bpf->prog, bpf->extack); return tsnep_xdp_setup_prog(adapter, bpf->prog, bpf->extack);
case XDP_SETUP_XSK_POOL:
return tsnep_xdp_setup_pool(adapter, bpf->xsk.pool,
bpf->xsk.queue_id);
default: default:
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
...@@ -1668,6 +2253,24 @@ static int tsnep_netdev_xdp_xmit(struct net_device *dev, int n, ...@@ -1668,6 +2253,24 @@ static int tsnep_netdev_xdp_xmit(struct net_device *dev, int n,
return nxmit; return nxmit;
} }
static int tsnep_netdev_xsk_wakeup(struct net_device *dev, u32 queue_id,
u32 flags)
{
struct tsnep_adapter *adapter = netdev_priv(dev);
struct tsnep_queue *queue;
if (queue_id >= adapter->num_rx_queues ||
queue_id >= adapter->num_tx_queues)
return -EINVAL;
queue = &adapter->queue[queue_id];
if (!napi_if_scheduled_mark_missed(&queue->napi))
napi_schedule(&queue->napi);
return 0;
}
static const struct net_device_ops tsnep_netdev_ops = { static const struct net_device_ops tsnep_netdev_ops = {
.ndo_open = tsnep_netdev_open, .ndo_open = tsnep_netdev_open,
.ndo_stop = tsnep_netdev_close, .ndo_stop = tsnep_netdev_close,
...@@ -1681,6 +2284,7 @@ static const struct net_device_ops tsnep_netdev_ops = { ...@@ -1681,6 +2284,7 @@ static const struct net_device_ops tsnep_netdev_ops = {
.ndo_setup_tc = tsnep_tc_setup, .ndo_setup_tc = tsnep_tc_setup,
.ndo_bpf = tsnep_netdev_bpf, .ndo_bpf = tsnep_netdev_bpf,
.ndo_xdp_xmit = tsnep_netdev_xdp_xmit, .ndo_xdp_xmit = tsnep_netdev_xdp_xmit,
.ndo_xsk_wakeup = tsnep_netdev_xsk_wakeup,
}; };
static int tsnep_mac_init(struct tsnep_adapter *adapter) static int tsnep_mac_init(struct tsnep_adapter *adapter)
...@@ -1796,9 +2400,16 @@ static int tsnep_queue_init(struct tsnep_adapter *adapter, int queue_count) ...@@ -1796,9 +2400,16 @@ static int tsnep_queue_init(struct tsnep_adapter *adapter, int queue_count)
adapter->num_tx_queues = 1; adapter->num_tx_queues = 1;
adapter->num_rx_queues = 1; adapter->num_rx_queues = 1;
adapter->num_queues = 1; adapter->num_queues = 1;
adapter->queue[0].adapter = adapter;
adapter->queue[0].irq = retval; adapter->queue[0].irq = retval;
adapter->queue[0].tx = &adapter->tx[0]; adapter->queue[0].tx = &adapter->tx[0];
adapter->queue[0].tx->adapter = adapter;
adapter->queue[0].tx->addr = adapter->addr + TSNEP_QUEUE(0);
adapter->queue[0].tx->queue_index = 0;
adapter->queue[0].rx = &adapter->rx[0]; adapter->queue[0].rx = &adapter->rx[0];
adapter->queue[0].rx->adapter = adapter;
adapter->queue[0].rx->addr = adapter->addr + TSNEP_QUEUE(0);
adapter->queue[0].rx->queue_index = 0;
adapter->queue[0].irq_mask = irq_mask; adapter->queue[0].irq_mask = irq_mask;
adapter->queue[0].irq_delay_addr = adapter->addr + ECM_INT_DELAY; adapter->queue[0].irq_delay_addr = adapter->addr + ECM_INT_DELAY;
retval = tsnep_set_irq_coalesce(&adapter->queue[0], retval = tsnep_set_irq_coalesce(&adapter->queue[0],
...@@ -1820,9 +2431,16 @@ static int tsnep_queue_init(struct tsnep_adapter *adapter, int queue_count) ...@@ -1820,9 +2431,16 @@ static int tsnep_queue_init(struct tsnep_adapter *adapter, int queue_count)
adapter->num_tx_queues++; adapter->num_tx_queues++;
adapter->num_rx_queues++; adapter->num_rx_queues++;
adapter->num_queues++; adapter->num_queues++;
adapter->queue[i].adapter = adapter;
adapter->queue[i].irq = retval; adapter->queue[i].irq = retval;
adapter->queue[i].tx = &adapter->tx[i]; adapter->queue[i].tx = &adapter->tx[i];
adapter->queue[i].tx->adapter = adapter;
adapter->queue[i].tx->addr = adapter->addr + TSNEP_QUEUE(i);
adapter->queue[i].tx->queue_index = i;
adapter->queue[i].rx = &adapter->rx[i]; adapter->queue[i].rx = &adapter->rx[i];
adapter->queue[i].rx->adapter = adapter;
adapter->queue[i].rx->addr = adapter->addr + TSNEP_QUEUE(i);
adapter->queue[i].rx->queue_index = i;
adapter->queue[i].irq_mask = adapter->queue[i].irq_mask =
irq_mask << (ECM_INT_TXRX_SHIFT * i); irq_mask << (ECM_INT_TXRX_SHIFT * i);
adapter->queue[i].irq_delay_addr = adapter->queue[i].irq_delay_addr =
...@@ -1927,7 +2545,8 @@ static int tsnep_probe(struct platform_device *pdev) ...@@ -1927,7 +2545,8 @@ static int tsnep_probe(struct platform_device *pdev)
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT |
NETDEV_XDP_ACT_NDO_XMIT_SG; NETDEV_XDP_ACT_NDO_XMIT_SG |
NETDEV_XDP_ACT_XSK_ZEROCOPY;
/* carrier off reporting is important to ethtool even BEFORE open */ /* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev); netif_carrier_off(netdev);
......
...@@ -17,3 +17,69 @@ int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct bpf_prog *prog, ...@@ -17,3 +17,69 @@ int tsnep_xdp_setup_prog(struct tsnep_adapter *adapter, struct bpf_prog *prog,
return 0; return 0;
} }
static int tsnep_xdp_enable_pool(struct tsnep_adapter *adapter,
struct xsk_buff_pool *pool, u16 queue_id)
{
struct tsnep_queue *queue;
int retval;
if (queue_id >= adapter->num_rx_queues ||
queue_id >= adapter->num_tx_queues)
return -EINVAL;
queue = &adapter->queue[queue_id];
if (queue->rx->queue_index != queue_id ||
queue->tx->queue_index != queue_id) {
netdev_err(adapter->netdev,
"XSK support only for TX/RX queue pairs\n");
return -EOPNOTSUPP;
}
retval = xsk_pool_dma_map(pool, adapter->dmadev,
DMA_ATTR_SKIP_CPU_SYNC);
if (retval) {
netdev_err(adapter->netdev, "failed to map XSK pool\n");
return retval;
}
retval = tsnep_enable_xsk(queue, pool);
if (retval) {
xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC);
return retval;
}
return 0;
}
static int tsnep_xdp_disable_pool(struct tsnep_adapter *adapter, u16 queue_id)
{
struct xsk_buff_pool *pool;
struct tsnep_queue *queue;
if (queue_id >= adapter->num_rx_queues ||
queue_id >= adapter->num_tx_queues)
return -EINVAL;
pool = xsk_get_pool_from_qid(adapter->netdev, queue_id);
if (!pool)
return -EINVAL;
queue = &adapter->queue[queue_id];
tsnep_disable_xsk(queue);
xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC);
return 0;
}
int tsnep_xdp_setup_pool(struct tsnep_adapter *adapter,
struct xsk_buff_pool *pool, u16 queue_id)
{
return pool ? tsnep_xdp_enable_pool(adapter, pool, queue_id) :
tsnep_xdp_disable_pool(adapter, queue_id);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment