net: ravb: Allocate RX buffers via page pool

This patch makes multiple changes that can't be separated: 1) Allocate plain RX buffers via a page pool instead of allocating SKBs, then use build_skb() when a packet is received. 2) For GbEth IP, reduce the RX buffer size to 2kB. 3) For GbEth IP, merge packets which span more than one RX descriptor as SKB fragments instead of copying data. Implementing (1) without (2) would require the use of an order-1 page pool (instead of an order-0 page pool split into page fragments) for GbEth. Implementing (2) without (3) would leave us no space to re-assemble packets which span more than one RX descriptor. Implementing (3) without (1) would not be possible as the network stack expects to use put_page() or page_pool_put_page() to free SKB fragments after an SKB is consumed. RX checksum offload support is adjusted to handle both linear and nonlinear (fragmented) packets. This patch gives the following improvements during testing with iperf3. * RZ/G2L: * TCP RX: same bandwidth at -43% CPU load (70% -> 40%) * UDP RX: same bandwidth at -17% CPU load (88% -> 74%) * RZ/G2UL: * TCP RX: +30% bandwidth (726Mbps -> 941Mbps) * UDP RX: +417% bandwidth (108Mbps -> 558Mbps) * RZ/G3S: * TCP RX: +64% bandwidth (562Mbps -> 920Mbps) * UDP RX: +420% bandwidth (90Mbps -> 468Mbps) * RZ/Five: * TCP RX: +217% bandwidth (145Mbps -> 459Mbps) * UDP RX: +470% bandwidth (20Mbps -> 114Mbps) There is no significant impact on bandwidth or CPU load in testing on RZ/G2H or R-Car M3N. Signed-off-by: Paul Barker <paul.barker.ct@bp.renesas.com> Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru> Signed-off-by: Paolo Abeni <pabeni@redhat.com>

net: ravb: Allocate RX buffers via page pool
This patch makes multiple changes that can't be separated: 1) Allocate plain RX buffers via a page pool instead of allocating SKBs, then use build_skb() when a packet is received. 2) For GbEth IP, reduce the RX buffer size to 2kB. 3) For GbEth IP, merge packets which span more than one RX descriptor as SKB fragments instead of copying data. Implementing (1) without (2) would require the use of an order-1 page pool (instead of an order-0 page pool split into page fragments) for GbEth. Implementing (2) without (3) would leave us no space to re-assemble packets which span more than one RX descriptor. Implementing (3) without (1) would not be possible as the network stack expects to use put_page() or page_pool_put_page() to free SKB fragments after an SKB is consumed. RX checksum offload support is adjusted to handle both linear and nonlinear (fragmented) packets. This patch gives the following improvements during testing with iperf3. * RZ/G2L: * TCP RX: same bandwidth at -43% CPU load (70% -> 40%) * UDP RX: same bandwidth at -17% CPU load (88% -> 74%) * RZ/G2UL: * TCP RX: +30% bandwidth (726Mbps -> 941Mbps) * UDP RX: +417% bandwidth (108Mbps -> 558Mbps) * RZ/G3S: * TCP RX: +64% bandwidth (562Mbps -> 920Mbps) * UDP RX: +420% bandwidth (90Mbps -> 468Mbps) * RZ/Five: * TCP RX: +217% bandwidth (145Mbps -> 459Mbps) * UDP RX: +470% bandwidth (20Mbps -> 114Mbps) There is no significant impact on bandwidth or CPU load in testing on RZ/G2H or R-Car M3N. Signed-off-by: Paul Barker <paul.barker.ct@bp.renesas.com> Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
96672632 · Paul Barker · Paolo Abeni · 65c482bc · 96672632 · 96672632
Commit 96672632 authored Jun 04, 2024 by Paul Barker Committed by Paolo Abeni Jun 06, 2024
Showing with 175 additions and 96 deletions

drivers/net/ethernet/renesas/ravb.h drivers/net/ethernet/renesas/ravb.h +9 -2

drivers/net/ethernet/renesas/ravb_main.c drivers/net/ethernet/renesas/ravb_main.c +166 -94

No files found.
--- a/drivers/net/ethernet/renesas/ravb.h
+++ b/drivers/net/ethernet/renesas/ravb.h
@@ -19,6 +19,7 @@
 #include <linux/phy.h>
 #include <linux/platform_device.h>
 #include <linux/ptp_clock_kernel.h>
+#include <net/page_pool/types.h>

 #define BE_TX_RING_SIZE	64	/* TX ring size for Best Effort */
 #define BE_RX_RING_SIZE	1024	/* RX ring size for Best Effort */
@@ -1051,7 +1052,7 @@ struct ravb_hw_info {
 	int stats_len;
 	u32 tccr_mask;
 	u32 rx_max_frame_size;
-	u32 rx_max_desc_use;
+	u32 rx_buffer_size;
 	u32 rx_desc_size;
 	unsigned aligned_tx: 1;
 	unsigned coalesce_irqs:1;	/* Needs software IRQ coalescing */
@@ -1071,6 +1072,11 @@ struct ravb_hw_info {
 	unsigned half_duplex:1;		/* E-MAC supports half duplex mode */
 };

+struct ravb_rx_buffer {
+	struct page *page;
+	unsigned int offset;
+};
+
 struct ravb_private {
 	struct net_device *ndev;
 	struct platform_device *pdev;
@@ -1094,7 +1100,8 @@ struct ravb_private {
 	struct ravb_tx_desc *tx_ring[NUM_TX_QUEUE];
 	void *tx_align[NUM_TX_QUEUE];
 	struct sk_buff *rx_1st_skb;
-	struct sk_buff **rx_skb[NUM_RX_QUEUE];
+	struct page_pool *rx_pool[NUM_RX_QUEUE];
+	struct ravb_rx_buffer *rx_buffers[NUM_RX_QUEUE];
 	struct sk_buff **tx_skb[NUM_TX_QUEUE];
 	u32 rx_over_errors;
 	u32 rx_fifo_errors;

--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c