idpf: use libeth Rx buffer management for payload buffer

idpf uses Page Pool for data buffers with hardcoded buffer lengths of 4k for "classic" buffers and 2k for "short" ones. This is not flexible and does not ensure optimal memory usage. Why would you need 4k buffers when the MTU is 1500? Use libeth for the data buffers and don't hardcode any buffer sizes. Let them be calculated from the MTU for "classics" and then divide the truesize by 2 for "short" ones. The memory usage is now greatly reduced and 2 buffer queues starts make sense: on frames <= 1024, you'll recycle (and resync) a page only after 4 HW writes rather than two. Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>

idpf: use libeth Rx buffer management for payload buffer
idpf uses Page Pool for data buffers with hardcoded buffer lengths of 4k for "classic" buffers and 2k for "short" ones. This is not flexible and does not ensure optimal memory usage. Why would you need 4k buffers when the MTU is 1500? Use libeth for the data buffers and don't hardcode any buffer sizes. Let them be calculated from the MTU for "classics" and then divide the truesize by 2 for "short" ones. The memory usage is now greatly reduced and 2 buffer queues starts make sense: on frames <= 1024, you'll recycle (and resync) a page only after 4 HW writes rather than two. Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com> Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
74d1412a · Alexander Lobakin · Tony Nguyen · 90912f9f · 74d1412a · 74d1412a
Commit 74d1412a authored Jun 20, 2024 by Alexander Lobakin Committed by Tony Nguyen Jul 10, 2024
6 changed files
--- a/drivers/net/ethernet/intel/idpf/Kconfig
+++ b/drivers/net/ethernet/intel/idpf/Kconfig
@@ -6,7 +6,6 @@ config IDPF
 	depends on PCI_MSI
 	select DIMLIB
 	select LIBETH
-	select PAGE_POOL
 	help
 	  This driver supports Intel(R) Infrastructure Data Path Function
 	  devices.

--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -264,7 +264,6 @@ struct idpf_port_stats {
 *		    the worst case.
 * @num_bufqs_per_qgrp: Buffer queues per RX queue in a given grouping
 * @bufq_desc_count: Buffer queue descriptor count
- * @bufq_size: Size of buffers in ring (e.g. 2K, 4K, etc)
 * @num_rxq_grp: Number of RX queues in a group
 * @rxq_grps: Total number of RX groups. Number of groups * number of RX per
 *	      group will yield total number of RX queues.
@@ -308,7 +307,6 @@ struct idpf_vport {
 	u32 rxq_desc_count;
 	u8 num_bufqs_per_qgrp;
 	u32 bufq_desc_count[IDPF_MAX_BUFQS_PER_RXQ_GRP];
-	u32 bufq_size[IDPF_MAX_BUFQS_PER_RXQ_GRP];
 	u16 num_rxq_grp;
 	struct idpf_rxq_group *rxq_grps;
 	u32 rxq_model;

--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
@@ -857,20 +857,24 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
 				      u16 cleaned_count)
 {
 	struct virtchnl2_singleq_rx_buf_desc *desc;
+	const struct libeth_fq_fp fq = {
+		.pp		= rx_q->pp,
+		.fqes		= rx_q->rx_buf,
+		.truesize	= rx_q->truesize,
+		.count		= rx_q->desc_count,
+	};
 	u16 nta = rx_q->next_to_alloc;
-	struct idpf_rx_buf *buf;

 	if (!cleaned_count)
 		return false;

 	desc = &rx_q->single_buf[nta];
-	buf = &rx_q->rx_buf[nta];

 	do {
 		dma_addr_t addr;

-		addr = idpf_alloc_page(rx_q->pp, buf, rx_q->rx_buf_size);
-		if (unlikely(addr == DMA_MAPPING_ERROR))
+		addr = libeth_rx_alloc(&fq, nta);
+		if (addr == DMA_MAPPING_ERROR)
 			break;

 		/* Refresh the desc even if buffer_addrs didn't change
@@ -880,11 +884,9 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rx_q,
 		desc->hdr_addr = 0;
 		desc++;

-		buf++;
 		nta++;
 		if (unlikely(nta == rx_q->desc_count)) {
 			desc = &rx_q->single_buf[0];
-			buf = rx_q->rx_buf;
 			nta = 0;
 		}

@@ -1004,28 +1006,26 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)
 		idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields);

 		rx_buf = &rx_q->rx_buf[ntc];
-		if (!fields.size) {
-			idpf_rx_put_page(rx_buf);
+		if (!libeth_rx_sync_for_cpu(rx_buf, fields.size))
 			goto skip_data;
-		}

-		idpf_rx_sync_for_cpu(rx_buf, fields.size);
 		if (skb)
 			idpf_rx_add_frag(rx_buf, skb, fields.size);
 		else
-			skb = idpf_rx_construct_skb(rx_q, rx_buf, fields.size);
+			skb = idpf_rx_build_skb(rx_buf, fields.size);

 		/* exit if we failed to retrieve a buffer */
 		if (!skb)
 			break;

 skip_data:
-		IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
+		rx_buf->page = NULL;

+		IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc);
 		cleaned_count++;

 		/* skip if it is non EOP desc */
-		if (idpf_rx_singleq_is_non_eop(rx_desc))
+		if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb))
 			continue;

 #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \
@@ -1062,6 +1062,7 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget)

 	rx_q->next_to_clean = ntc;

+	page_pool_nid_changed(rx_q->pp, numa_mem_id());
 	if (cleaned_count)
 		failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count);


--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
@@ -7,7 +7,6 @@
 #include <linux/dim.h>

 #include <net/libeth/cache.h>
-#include <net/libeth/rx.h>
 #include <net/tcp.h>
 #include <net/netdev_queues.h>

@@ -97,14 +96,10 @@ do {								\
 		idx = 0;					\
 } while (0)

-#define IDPF_RX_HDR_SIZE			256
-#define IDPF_RX_BUF_2048			2048
-#define IDPF_RX_BUF_4096			4096
 #define IDPF_RX_BUF_STRIDE			32
 #define IDPF_RX_BUF_POST_STRIDE			16
 #define IDPF_LOW_WATERMARK			64
-#define IDPF_PACKET_HDR_PAD	\
-	(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN * 2)
+
 #define IDPF_TX_TSO_MIN_MSS			88

 /* Minimum number of descriptors between 2 descriptors with the RE bit set;
@@ -538,7 +533,7 @@ struct idpf_txq_stash {
 * @desc_ring: virtual descriptor ring address
 * @bufq_sets: Pointer to the array of buffer queues in splitq mode
 * @napi: NAPI instance corresponding to this queue (splitq)
- * @rx_buf: See struct idpf_rx_buf
+ * @rx_buf: See struct &libeth_fqe
 * @pp: Page pool pointer in singleq mode
 * @netdev: &net_device corresponding to this queue
 * @tail: Tail offset. Used for both queue models single and split.
@@ -552,6 +547,7 @@ struct idpf_txq_stash {
 * @next_to_clean: Next descriptor to clean
 * @next_to_alloc: RX buffer to allocate at
 * @skb: Pointer to the skb
+ * @truesize: data buffer truesize in singleq
 * @stats_sync: See struct u64_stats_sync
 * @q_stats: See union idpf_rx_queue_stats
 * @q_id: Queue id
@@ -577,7 +573,7 @@ struct idpf_rx_queue {
 			struct napi_struct *napi;
 		};
 		struct {
-			struct idpf_rx_buf *rx_buf;
+			struct libeth_fqe *rx_buf;
 			struct page_pool *pp;
 		};
 	};
@@ -598,6 +594,7 @@ struct idpf_rx_queue {
 	u16 next_to_alloc;

 	struct sk_buff *skb;
+	u32 truesize;

 	struct u64_stats_sync stats_sync;
 	struct idpf_rx_queue_stats q_stats;
@@ -617,7 +614,7 @@ struct idpf_rx_queue {
 	__cacheline_group_end_aligned(cold);
 };
 libeth_cacheline_set_assert(struct idpf_rx_queue, 64,
-			    72 + sizeof(struct u64_stats_sync),
+			    80 + sizeof(struct u64_stats_sync),
 			    32);

 /**
@@ -744,15 +741,16 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64,
 * @split_buf: buffer descriptor array
 * @hdr_buf: &libeth_fqe for header buffers
 * @hdr_pp: &page_pool for header buffers
- * @buf: &idpf_rx_buf for data buffers
+ * @buf: &libeth_fqe for data buffers
 * @pp: &page_pool for data buffers
 * @tail: Tail offset
 * @flags: See enum idpf_queue_flags_t
 * @desc_count: Number of descriptors
- * @hdr_truesize: truesize for buffer headers
 * @next_to_use: Next descriptor to use
 * @next_to_clean: Next descriptor to clean
 * @next_to_alloc: RX buffer to allocate at
+ * @hdr_truesize: truesize for buffer headers
+ * @truesize: truesize for data buffers
 * @q_id: Queue id
 * @size: Length of descriptor ring in bytes
 * @dma: Physical address of ring
@@ -766,20 +764,21 @@ struct idpf_buf_queue {
 	struct virtchnl2_splitq_rx_buf_desc *split_buf;
 	struct libeth_fqe *hdr_buf;
 	struct page_pool *hdr_pp;
-	struct idpf_rx_buf *buf;
+	struct libeth_fqe *buf;
 	struct page_pool *pp;
 	void __iomem *tail;

 	DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS);
 	u32 desc_count;
-
-	u32 hdr_truesize;
 	__cacheline_group_end_aligned(read_mostly);

 	__cacheline_group_begin_aligned(read_write);
 	u32 next_to_use;
 	u32 next_to_clean;
 	u32 next_to_alloc;
+
+	u32 hdr_truesize;
+	u32 truesize;
 	__cacheline_group_end_aligned(read_write);

 	__cacheline_group_begin_aligned(cold);
@@ -794,7 +793,7 @@ struct idpf_buf_queue {
 	u16 rx_buf_size;
 	__cacheline_group_end_aligned(cold);
 };
-libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 16, 32);
+libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 24, 32);

 /**
 * struct idpf_compl_queue - software structure representing a completion queue
@@ -1034,60 +1033,6 @@ static inline void idpf_tx_splitq_build_desc(union idpf_tx_flex_desc *desc,
 		idpf_tx_splitq_build_flow_desc(desc, params, td_cmd, size);
 }

-/**
- * idpf_alloc_page - Allocate a new RX buffer from the page pool
- * @pool: page_pool to allocate from
- * @buf: metadata struct to populate with page info
- * @buf_size: 2K or 4K
- *
- * Returns &dma_addr_t to be passed to HW for Rx, %DMA_MAPPING_ERROR otherwise.
- */
-static inline dma_addr_t idpf_alloc_page(struct page_pool *pool,
-					 struct idpf_rx_buf *buf,
-					 unsigned int buf_size)
-{
-	if (buf_size == IDPF_RX_BUF_2048)
-		buf->page = page_pool_dev_alloc_frag(pool, &buf->offset,
-						     buf_size);
-	else
-		buf->page = page_pool_dev_alloc_pages(pool);
-
-	if (!buf->page)
-		return DMA_MAPPING_ERROR;
-
-	buf->truesize = buf_size;
-
-	return page_pool_get_dma_addr(buf->page) + buf->offset +
-	       pool->p.offset;
-}
-
-/**
- * idpf_rx_put_page - Return RX buffer page to pool
- * @rx_buf: RX buffer metadata struct
- */
-static inline void idpf_rx_put_page(struct idpf_rx_buf *rx_buf)
-{
-	page_pool_put_page(rx_buf->page->pp, rx_buf->page,
-			   rx_buf->truesize, true);
-	rx_buf->page = NULL;
-}
-
-/**
- * idpf_rx_sync_for_cpu - Synchronize DMA buffer
- * @rx_buf: RX buffer metadata struct
- * @len: frame length from descriptor
- */
-static inline void idpf_rx_sync_for_cpu(struct idpf_rx_buf *rx_buf, u32 len)
-{
-	struct page *page = rx_buf->page;
-	struct page_pool *pp = page->pp;
-
-	dma_sync_single_range_for_cpu(pp->p.dev,
-				      page_pool_get_dma_addr(page),
-				      rx_buf->offset + pp->p.offset, len,
-				      page_pool_get_dma_dir(pp));
-}
-
 int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget);
 void idpf_vport_init_num_qs(struct idpf_vport *vport,
 			    struct virtchnl2_create_vport *vport_msg);
@@ -1110,9 +1055,6 @@ void idpf_deinit_rss(struct idpf_vport *vport);
 int idpf_rx_bufs_init_all(struct idpf_vport *vport);
 void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
 		      unsigned int size);
-struct sk_buff *idpf_rx_construct_skb(const struct idpf_rx_queue *rxq,
-				      struct idpf_rx_buf *rx_buf,
-				      unsigned int size);
 struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size);
 void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
 			   bool xmit_more);

--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -1615,6 +1615,12 @@ static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport)
 			rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq;
 			sets = rxq->bufq_sets;

+			/* In splitq mode, RXQ buffer size should be
+			 * set to that of the first buffer queue
+			 * associated with this RXQ.
+			 */
+			rxq->rx_buf_size = sets[0].bufq.rx_buf_size;
+
 			qi[k].rx_bufq1_id = cpu_to_le16(sets[0].bufq.q_id);
 			if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) {
 				qi[k].bufq2_ena = IDPF_BUFQ2_ENA;
@@ -3167,7 +3173,7 @@ void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q)
 	rss_data->rss_lut_size = le16_to_cpu(vport_msg->rss_lut_size);

 	ether_addr_copy(vport->default_mac_addr, vport_msg->default_mac_addr);
-	vport->max_mtu = le16_to_cpu(vport_msg->max_mtu) - IDPF_PACKET_HDR_PAD;
+	vport->max_mtu = le16_to_cpu(vport_msg->max_mtu) - LIBETH_RX_LL_LEN;

 	/* Initialize Tx and Rx profiles for Dynamic Interrupt Moderation */
 	memcpy(vport->rx_itr_profile, rx_itr, IDPF_DIM_PROFILE_SLOTS);