Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says: ==================== idpf: XDP chapter I: convert Rx to libeth Alexander Lobakin says: XDP for idpf is currently 5 chapters: * convert Rx to libeth (this); * convert Tx and stats to libeth; * generic XDP and XSk code changes, libeth_xdp; * actual XDP for idpf via libeth_xdp; * XSk for idpf (^). Part I does the following: * splits &idpf_queue into 4 (RQ, SQ, FQ, CQ) and puts them on a diet; * ensures optimal cacheline placement, strictly asserts CL sizes; * moves currently unused/dead singleq mode out of line; * reuses libeth's Rx ptype definitions and helpers; * uses libeth's Rx buffer management for both header and payload; * eliminates memcpy()s and coherent DMA uses on hotpath, uses napi_build_skb() instead of in-place short skb allocation. Most idpf patches, except for the queue split, removes more lines than adds. Expect far better memory utilization and +5-8% on Rx depending on the case (+17% on skb XDP_DROP :>). * '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue: idpf: use libeth Rx buffer management for payload buffer idpf: convert header split mode to libeth + napi_build_skb() libeth: support different types of buffers for Rx idpf: remove legacy Page Pool Ethtool stats idpf: reuse libeth's definitions of parsed ptype structures idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ idpf: merge singleq and splitq &net_device_ops idpf: strictly assert cachelines of queue and queue vector structures idpf: avoid bloating &idpf_q_vector with big %NR_CPUS idpf: split &idpf_queue into 4 strictly-typed queue structures idpf: stop using macros for accessing queue descriptors libeth: add cacheline / struct layout assertion helpers page_pool: use __cacheline_group_{begin, end}_aligned() cache: add __cacheline_group_{begin, end}_aligned() (+ couple more) ==================== Link: https://patch.msgid.link/20240710203031.188081-1-anthony.l.nguyen@intel.comSigned-off-by: Jakub Kicinski <kuba@kernel.org>

Merge branch '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue
Tony Nguyen says: ==================== idpf: XDP chapter I: convert Rx to libeth Alexander Lobakin says: XDP for idpf is currently 5 chapters: * convert Rx to libeth (this); * convert Tx and stats to libeth; * generic XDP and XSk code changes, libeth_xdp; * actual XDP for idpf via libeth_xdp; * XSk for idpf (^). Part I does the following: * splits &idpf_queue into 4 (RQ, SQ, FQ, CQ) and puts them on a diet; * ensures optimal cacheline placement, strictly asserts CL sizes; * moves currently unused/dead singleq mode out of line; * reuses libeth's Rx ptype definitions and helpers; * uses libeth's Rx buffer management for both header and payload; * eliminates memcpy()s and coherent DMA uses on hotpath, uses napi_build_skb() instead of in-place short skb allocation. Most idpf patches, except for the queue split, removes more lines than adds. Expect far better memory utilization and +5-8% on Rx depending on the case (+17% on skb XDP_DROP :>). * '200GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue: idpf: use libeth Rx buffer management for payload buffer idpf: convert header split mode to libeth + napi_build_skb() libeth: support different types of buffers for Rx idpf: remove legacy Page Pool Ethtool stats idpf: reuse libeth's definitions of parsed ptype structures idpf: compile singleq code only under default-n CONFIG_IDPF_SINGLEQ idpf: merge singleq and splitq &net_device_ops idpf: strictly assert cachelines of queue and queue vector structures idpf: avoid bloating &idpf_q_vector with big %NR_CPUS idpf: split &idpf_queue into 4 strictly-typed queue structures idpf: stop using macros for accessing queue descriptors libeth: add cacheline / struct layout assertion helpers page_pool: use __cacheline_group_{begin, end}_aligned() cache: add __cacheline_group_{begin, end}_aligned() (+ couple more) ==================== Link: https://patch.msgid.link/20240710203031.188081-1-anthony.l.nguyen@intel.comSigned-off-by: Jakub Kicinski <kuba@kernel.org>
69cf8730 · Jakub Kicinski · 26f45317 · 74d1412a · 69cf8730 · 69cf8730
Commit 69cf8730 authored Jul 12, 2024 by Jakub Kicinski
18 changed files
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -384,17 +384,6 @@ config IGC_LEDS
 	  Optional support for controlling the NIC LED's with the netdev
 	  LED trigger.
-config IDPF
+source "drivers/net/ethernet/intel/idpf/Kconfig"
-	tristate "Intel(R) Infrastructure Data Path Function Support"
-	depends on PCI_MSI
-	select DIMLIB
-	select PAGE_POOL
-	select PAGE_POOL_STATS
-	help
-	  This driver supports Intel(R) Infrastructure Data Path Function
-	  devices.
-	  To compile this driver as a module, choose M here. The module
-	  will be called idpf.
 endif # NET_VENDOR_INTEL
--- a/drivers/net/ethernet/intel/idpf/Kconfig
+++ b/drivers/net/ethernet/intel/idpf/Kconfig
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2024 Intel Corporation
+config IDPF
+	tristate "Intel(R) Infrastructure Data Path Function Support"
+	depends on PCI_MSI
+	select DIMLIB
+	select LIBETH
+	help
+	  This driver supports Intel(R) Infrastructure Data Path Function
+	  devices.
+	  To compile this driver as a module, choose M here. The module
+	  will be called idpf.
+if IDPF
+config IDPF_SINGLEQ
+	bool "idpf singleq support"
+	help
+	  This option enables support for legacy single Rx/Tx queues w/no
+	  completion and fill queues. Only enable if you have hardware which
+	  wants to work in this mode as it increases the driver size and adds
+	  runtme checks on hotpath.
+endif # IDPF
--- a/drivers/net/ethernet/intel/idpf/Makefile
+++ b/drivers/net/ethernet/intel/idpf/Makefile
@@ -12,7 +12,8 @@ idpf-y := \
 	idpf_ethtool.o		\
 	idpf_lib.o		\
 	idpf_main.o		\
-	idpf_singleq_txrx.o	\
 	idpf_txrx.o		\
 	idpf_virtchnl.o 	\
 	idpf_vf_dev.o
+idpf-$(CONFIG_IDPF_SINGLEQ)	+= idpf_singleq_txrx.o
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -17,10 +17,8 @@ struct idpf_vport_max_q;
 #include <linux/sctp.h>
 #include <linux/ethtool_netlink.h>
 #include <net/gro.h>
-#include <linux/dim.h>
 #include "virtchnl2.h"
-#include "idpf_lan_txrx.h"
 #include "idpf_txrx.h"
 #include "idpf_controlq.h"
@@ -266,7 +264,6 @@ struct idpf_port_stats {
 *		    the worst case.
 * @num_bufqs_per_qgrp: Buffer queues per RX queue in a given grouping
 * @bufq_desc_count: Buffer queue descriptor count
- * @bufq_size: Size of buffers in ring (e.g. 2K, 4K, etc)
 * @num_rxq_grp: Number of RX queues in a group
 * @rxq_grps: Total number of RX groups. Number of groups * number of RX per
 *	      group will yield total number of RX queues.
@@ -302,7 +299,7 @@ struct idpf_vport {
 	u16 num_txq_grp;
 	struct idpf_txq_group *txq_grps;
 	u32 txq_model;
-	struct idpf_queue **txqs;
+	struct idpf_tx_queue **txqs;
 	bool crc_enable;
 	u16 num_rxq;
@@ -310,11 +307,10 @@ struct idpf_vport {
 	u32 rxq_desc_count;
 	u8 num_bufqs_per_qgrp;
 	u32 bufq_desc_count[IDPF_MAX_BUFQS_PER_RXQ_GRP];
-	u32 bufq_size[IDPF_MAX_BUFQS_PER_RXQ_GRP];
 	u16 num_rxq_grp;
 	struct idpf_rxq_group *rxq_grps;
 	u32 rxq_model;
-	struct idpf_rx_ptype_decoded rx_ptype_lkup[IDPF_RX_MAX_PTYPE];
+	struct libeth_rx_pt *rx_ptype_lkup;
 	struct idpf_adapter *adapter;
 	struct net_device *netdev;
@@ -601,7 +597,8 @@ struct idpf_adapter {
 */
 static inline int idpf_is_queue_model_split(u16 q_model)
 {
-	return q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT;
+	return !IS_ENABLED(CONFIG_IDPF_SINGLEQ) ||
+	       q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT;
 }
 #define idpf_is_cap_ena(adapter, field, flag) \

--- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c
--- a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h
@@ -4,6 +4,8 @@
 #ifndef _IDPF_LAN_TXRX_H_
 #define _IDPF_LAN_TXRX_H_
+#include <linux/bits.h>
 enum idpf_rss_hash {
 	IDPF_HASH_INVALID			= 0,
 	/* Values 1 - 28 are reserved for future use */

--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -4,8 +4,7 @@
 #include "idpf.h"
 #include "idpf_virtchnl.h"
-static const struct net_device_ops idpf_netdev_ops_splitq;
+static const struct net_device_ops idpf_netdev_ops;
-static const struct net_device_ops idpf_netdev_ops_singleq;
 /**
 * idpf_init_vector_stack - Fill the MSIX vector stack with vector index
@@ -69,7 +68,7 @@ static void idpf_deinit_vector_stack(struct idpf_adapter *adapter)
 static void idpf_mb_intr_rel_irq(struct idpf_adapter *adapter)
 {
 	clear_bit(IDPF_MB_INTR_MODE, adapter->flags);
-	free_irq(adapter->msix_entries[0].vector, adapter);
+	kfree(free_irq(adapter->msix_entries[0].vector, adapter));
 	queue_delayed_work(adapter->mbx_wq, &adapter->mbx_task, 0);
 }
@@ -124,15 +123,14 @@ static void idpf_mb_irq_enable(struct idpf_adapter *adapter)
 */
 static int idpf_mb_intr_req_irq(struct idpf_adapter *adapter)
 {
-	struct idpf_q_vector *mb_vector = &adapter->mb_vector;
 	int irq_num, mb_vidx = 0, err;
+	char *name;
 	irq_num = adapter->msix_entries[mb_vidx].vector;
-	mb_vector->name = kasprintf(GFP_KERNEL, "%s-%s-%d",
+	name = kasprintf(GFP_KERNEL, "%s-%s-%d",
 			 dev_driver_string(&adapter->pdev->dev),
 			 "Mailbox", mb_vidx);
-	err = request_irq(irq_num, adapter->irq_mb_handler, 0,
+	err = request_irq(irq_num, adapter->irq_mb_handler, 0, name, adapter);
-			  mb_vector->name, adapter);
 	if (err) {
 		dev_err(&adapter->pdev->dev,
 			"IRQ request for mailbox failed, error: %d\n", err);
@@ -765,10 +763,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport)
 	}
 	/* assign netdev_ops */
-	if (idpf_is_queue_model_split(vport->txq_model))
+	netdev->netdev_ops = &idpf_netdev_ops;
-		netdev->netdev_ops = &idpf_netdev_ops_splitq;
-	else
-		netdev->netdev_ops = &idpf_netdev_ops_singleq;
 	/* setup watchdog timeout value to be 5 second */
 	netdev->watchdog_timeo = 5 * HZ;
@@ -946,6 +941,9 @@ static void idpf_decfg_netdev(struct idpf_vport *vport)
 {
 	struct idpf_adapter *adapter = vport->adapter;
+	kfree(vport->rx_ptype_lkup);
+	vport->rx_ptype_lkup = NULL;
 	unregister_netdev(vport->netdev);
 	free_netdev(vport->netdev);
 	vport->netdev = NULL;
@@ -1318,14 +1316,14 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport)
 		if (idpf_is_queue_model_split(vport->rxq_model)) {
 			for (j = 0; j < vport->num_bufqs_per_qgrp; j++) {
-				struct idpf_queue *q =
+				const struct idpf_buf_queue *q =
 					&grp->splitq.bufq_sets[j].bufq;
 				writel(q->next_to_alloc, q->tail);
 			}
 		} else {
 			for (j = 0; j < grp->singleq.num_rxq; j++) {
-				struct idpf_queue *q =
+				const struct idpf_rx_queue *q =
 					grp->singleq.rxqs[j];
 				writel(q->next_to_alloc, q->tail);
@@ -1855,7 +1853,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 	enum idpf_vport_state current_state = np->state;
 	struct idpf_adapter *adapter = vport->adapter;
 	struct idpf_vport *new_vport;
-	int err, i;
+	int err;
 	/* If the system is low on memory, we can end up in bad state if we
 	 * free all the memory for queue resources and try to allocate them
@@ -1929,46 +1927,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 	 */
 	memcpy(vport, new_vport, offsetof(struct idpf_vport, link_speed_mbps));
-	/* Since idpf_vport_queues_alloc was called with new_port, the queue
-	 * back pointers are currently pointing to the local new_vport. Reset
-	 * the backpointers to the original vport here
-	 */
-	for (i = 0; i < vport->num_txq_grp; i++) {
-		struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i];
-		int j;
-		tx_qgrp->vport = vport;
-		for (j = 0; j < tx_qgrp->num_txq; j++)
-			tx_qgrp->txqs[j]->vport = vport;
-		if (idpf_is_queue_model_split(vport->txq_model))
-			tx_qgrp->complq->vport = vport;
-	}
-	for (i = 0; i < vport->num_rxq_grp; i++) {
-		struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i];
-		struct idpf_queue *q;
-		u16 num_rxq;
-		int j;
-		rx_qgrp->vport = vport;
-		for (j = 0; j < vport->num_bufqs_per_qgrp; j++)
-			rx_qgrp->splitq.bufq_sets[j].bufq.vport = vport;
-		if (idpf_is_queue_model_split(vport->rxq_model))
-			num_rxq = rx_qgrp->splitq.num_rxq_sets;
-		else
-			num_rxq = rx_qgrp->singleq.num_rxq;
-		for (j = 0; j < num_rxq; j++) {
-			if (idpf_is_queue_model_split(vport->rxq_model))
-				q = &rx_qgrp->splitq.rxq_sets[j]->rxq;
-			else
-				q = rx_qgrp->singleq.rxqs[j];
-			q->vport = vport;
-		}
-	}
 	if (reset_cause == IDPF_SR_Q_CHANGE)
 		idpf_vport_alloc_vec_indexes(vport);
@@ -2393,24 +2351,10 @@ void idpf_free_dma_mem(struct idpf_hw *hw, struct idpf_dma_mem *mem)
 	mem->pa = 0;
 }
-static const struct net_device_ops idpf_netdev_ops_splitq = {
+static const struct net_device_ops idpf_netdev_ops = {
-	.ndo_open = idpf_open,
-	.ndo_stop = idpf_stop,
-	.ndo_start_xmit = idpf_tx_splitq_start,
-	.ndo_features_check = idpf_features_check,
-	.ndo_set_rx_mode = idpf_set_rx_mode,
-	.ndo_validate_addr = eth_validate_addr,
-	.ndo_set_mac_address = idpf_set_mac,
-	.ndo_change_mtu = idpf_change_mtu,
-	.ndo_get_stats64 = idpf_get_stats64,
-	.ndo_set_features = idpf_set_features,
-	.ndo_tx_timeout = idpf_tx_timeout,
-};
-static const struct net_device_ops idpf_netdev_ops_singleq = {
 	.ndo_open = idpf_open,
 	.ndo_stop = idpf_stop,
-	.ndo_start_xmit = idpf_tx_singleq_start,
+	.ndo_start_xmit = idpf_tx_start,
 	.ndo_features_check = idpf_features_check,
 	.ndo_set_rx_mode = idpf_set_rx_mode,
 	.ndo_validate_addr = eth_validate_addr,

--- a/drivers/net/ethernet/intel/idpf/idpf_main.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_main.c
@@ -8,6 +8,7 @@
 #define DRV_SUMMARY	"Intel(R) Infrastructure Data Path Function Linux Driver"
 MODULE_DESCRIPTION(DRV_SUMMARY);
+MODULE_IMPORT_NS(LIBETH);
 MODULE_LICENSE("GPL");
 /**

--- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c
--- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h
+++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
--- a/drivers/net/ethernet/intel/libeth/rx.c
+++ b/drivers/net/ethernet/intel/libeth/rx.c
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
--- a/include/net/libeth/cache.h
+++ b/include/net/libeth/cache.h
--- a/include/net/libeth/rx.h
+++ b/include/net/libeth/rx.h
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -178,7 +178,8 @@ static void page_pool_struct_check(void)
 	CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users);
 	CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page);
 	CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset);
-	CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, 4 * sizeof(long));
+	CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag,
+				    PAGE_POOL_FRAG_GROUP_ALIGN);
 }
 static int page_pool_init(struct page_pool *pool,