Merge branch 'sfc-prerequisites-for-EF100-driver-part-1'

Edward Cree says: ==================== sfc: prerequisites for EF100 driver, part 1 This continues the work started by Alex Maftei <amaftei@solarflare.com> in the series "sfc: code refactoring", "sfc: more code refactoring", "sfc: even more code refactoring" and "sfc: refactor mcdi filtering code", to prepare for a new driver which will share much of the code to support the new EF100 family of Solarflare/Xilinx NICs. After this series, there will be approximately two more of these 'prerequisites' series, followed by the sfc_ef100 driver itself. v2: fix reverse xmas tree in patch 5. (Left the cases in patches 7, 9 and 14 alone as those are all in pure movement of existing code.) ==================== Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>

Merge branch 'sfc-prerequisites-for-EF100-driver-part-1'
Edward Cree says: ==================== sfc: prerequisites for EF100 driver, part 1 This continues the work started by Alex Maftei <amaftei@solarflare.com> in the series "sfc: code refactoring", "sfc: more code refactoring", "sfc: even more code refactoring" and "sfc: refactor mcdi filtering code", to prepare for a new driver which will share much of the code to support the new EF100 family of Solarflare/Xilinx NICs. After this series, there will be approximately two more of these 'prerequisites' series, followed by the sfc_ef100 driver itself. v2: fix reverse xmas tree in patch 5. (Left the cases in patches 7, 9 and 14 alone as those are all in pure movement of existing code.) ==================== Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
17af2c47 · David S. Miller · 5f035af7 · 4d9c0a2d · 17af2c47 · 17af2c47
Commit 17af2c47 authored Jun 29, 2020 by David S. Miller
24 changed files
--- a/drivers/net/ethernet/sfc/bitfield.h
+++ b/drivers/net/ethernet/sfc/bitfield.h
@@ -282,7 +282,10 @@ typedef union efx_oword {
 				 field7, value7,			\
 				 field8, value8,			\
 				 field9, value9,			\
-				 field10, value10)			\
+				 field10, value10,			\
+				 field11, value11,			\
+				 field12, value12,			\
+				 field13, value13)			\
 	(EFX_INSERT_FIELD_NATIVE((min), (max), field1, (value1)) |	\
 	 EFX_INSERT_FIELD_NATIVE((min), (max), field2, (value2)) |	\
 	 EFX_INSERT_FIELD_NATIVE((min), (max), field3, (value3)) |	\
@@ -292,7 +295,10 @@ typedef union efx_oword {
 	 EFX_INSERT_FIELD_NATIVE((min), (max), field7, (value7)) |	\
 	 EFX_INSERT_FIELD_NATIVE((min), (max), field8, (value8)) |	\
 	 EFX_INSERT_FIELD_NATIVE((min), (max), field9, (value9)) |	\
-	 EFX_INSERT_FIELD_NATIVE((min), (max), field10, (value10)))
+	 EFX_INSERT_FIELD_NATIVE((min), (max), field10, (value10)) |	\
+	 EFX_INSERT_FIELD_NATIVE((min), (max), field11, (value11)) |	\
+	 EFX_INSERT_FIELD_NATIVE((min), (max), field12, (value12)) |	\
+	 EFX_INSERT_FIELD_NATIVE((min), (max), field13, (value13)))

 #define EFX_INSERT_FIELDS64(...)				\
 	cpu_to_le64(EFX_INSERT_FIELDS_NATIVE(__VA_ARGS__))
@@ -334,7 +340,13 @@ typedef union efx_oword {
 #endif

 /* Populate an octword field with various numbers of arguments */
-#define EFX_POPULATE_OWORD_10 EFX_POPULATE_OWORD
+#define EFX_POPULATE_OWORD_13 EFX_POPULATE_OWORD
+#define EFX_POPULATE_OWORD_12(oword, ...) \
+	EFX_POPULATE_OWORD_13(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_11(oword, ...) \
+	EFX_POPULATE_OWORD_12(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_OWORD_10(oword, ...) \
+	EFX_POPULATE_OWORD_11(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
 #define EFX_POPULATE_OWORD_9(oword, ...) \
 	EFX_POPULATE_OWORD_10(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
 #define EFX_POPULATE_OWORD_8(oword, ...) \
@@ -363,7 +375,13 @@ typedef union efx_oword {
 			     EFX_DWORD_3, 0xffffffff)

 /* Populate a quadword field with various numbers of arguments */
-#define EFX_POPULATE_QWORD_10 EFX_POPULATE_QWORD
+#define EFX_POPULATE_QWORD_13 EFX_POPULATE_QWORD
+#define EFX_POPULATE_QWORD_12(qword, ...) \
+	EFX_POPULATE_QWORD_13(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_11(qword, ...) \
+	EFX_POPULATE_QWORD_12(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_QWORD_10(qword, ...) \
+	EFX_POPULATE_QWORD_11(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
 #define EFX_POPULATE_QWORD_9(qword, ...) \
 	EFX_POPULATE_QWORD_10(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
 #define EFX_POPULATE_QWORD_8(qword, ...) \
@@ -390,7 +408,13 @@ typedef union efx_oword {
 			     EFX_DWORD_1, 0xffffffff)

 /* Populate a dword field with various numbers of arguments */
-#define EFX_POPULATE_DWORD_10 EFX_POPULATE_DWORD
+#define EFX_POPULATE_DWORD_13 EFX_POPULATE_DWORD
+#define EFX_POPULATE_DWORD_12(dword, ...) \
+	EFX_POPULATE_DWORD_13(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_11(dword, ...) \
+	EFX_POPULATE_DWORD_12(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EFX_POPULATE_DWORD_10(dword, ...) \
+	EFX_POPULATE_DWORD_11(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
 #define EFX_POPULATE_DWORD_9(dword, ...) \
 	EFX_POPULATE_DWORD_10(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
 #define EFX_POPULATE_DWORD_8(dword, ...) \

--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -601,10 +601,14 @@ static int efx_ef10_probe(struct efx_nic *efx)
 	 * However, until we use TX option descriptors we need two TX queues
 	 * per channel.
 	 */
-	efx->max_channels = min_t(unsigned int,
-				  EFX_MAX_CHANNELS,
-				  efx_ef10_mem_map_size(efx) /
-				  (efx->vi_stride * EFX_TXQ_TYPES));
+	efx->max_vis = efx_ef10_mem_map_size(efx) / efx->vi_stride;
+	if (!efx->max_vis) {
+		netif_err(efx, drv, efx->net_dev, "error determining max VIs\n");
+		rc = -EIO;
+		goto fail5;
+	}
+	efx->max_channels = min_t(unsigned int, EFX_MAX_CHANNELS,
+				  efx->max_vis / EFX_TXQ_TYPES);
 	efx->max_tx_channels = efx->max_channels;
 	if (WARN_ON(efx->max_channels == 0)) {
 		rc = -EIO;
@@ -1129,6 +1133,12 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 			  ((efx->n_tx_channels + efx->n_extra_tx_channels) *
 			   EFX_TXQ_TYPES) +
 			   efx->n_xdp_channels * efx->xdp_tx_per_channel);
+	if (efx->max_vis && efx->max_vis < channel_vis) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Reducing channel VIs from %u to %u\n",
+			  channel_vis, efx->max_vis);
+		channel_vis = efx->max_vis;
+	}

 #ifdef EFX_USE_PIO
 	/* Try to allocate PIO buffers if wanted and if the full
@@ -1269,6 +1279,14 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
 	return 0;
 }

+static void efx_ef10_fini_nic(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+	kfree(nic_data->mc_stats);
+	nic_data->mc_stats = NULL;
+}
+
 static int efx_ef10_init_nic(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
@@ -1290,6 +1308,11 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
 		efx->must_realloc_vis = false;
 	}

+	nic_data->mc_stats = kmalloc(efx->num_mac_stats * sizeof(__le64),
+				     GFP_KERNEL);
+	if (!nic_data->mc_stats)
+		return -ENOMEM;
+
 	if (nic_data->must_restore_piobufs && nic_data->n_piobufs) {
 		rc = efx_ef10_alloc_piobufs(efx, nic_data->n_piobufs);
 		if (rc == 0) {
@@ -1410,8 +1433,6 @@ static int efx_ef10_reset(struct efx_nic *efx, enum reset_type reset_type)
 	{ NULL, 64, 8 * MC_CMD_MAC_ ## mcdi_name }
 #define EF10_OTHER_STAT(ext_name)				\
 	[EF10_STAT_ ## ext_name] = { #ext_name, 0, 0 }
-#define GENERIC_SW_STAT(ext_name)				\
-	[GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }

 static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = {
 	EF10_DMA_STAT(port_tx_bytes, TX_BYTES),
@@ -1455,8 +1476,8 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = {
 	EF10_DMA_STAT(port_rx_align_error, RX_ALIGN_ERROR_PKTS),
 	EF10_DMA_STAT(port_rx_length_error, RX_LENGTH_ERROR_PKTS),
 	EF10_DMA_STAT(port_rx_nodesc_drops, RX_NODESC_DROPS),
-	GENERIC_SW_STAT(rx_nodesc_trunc),
-	GENERIC_SW_STAT(rx_noskb_drops),
+	EFX_GENERIC_SW_STAT(rx_nodesc_trunc),
+	EFX_GENERIC_SW_STAT(rx_noskb_drops),
 	EF10_DMA_STAT(port_rx_pm_trunc_bb_overflow, PM_TRUNC_BB_OVERFLOW),
 	EF10_DMA_STAT(port_rx_pm_discard_bb_overflow, PM_DISCARD_BB_OVERFLOW),
 	EF10_DMA_STAT(port_rx_pm_trunc_vfifo_full, PM_TRUNC_VFIFO_FULL),
@@ -1765,55 +1786,42 @@ static size_t efx_ef10_update_stats_common(struct efx_nic *efx, u64 *full_stats,
 	return stats_count;
 }

-static int efx_ef10_try_update_nic_stats_pf(struct efx_nic *efx)
+static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats,
+				       struct rtnl_link_stats64 *core_stats)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	DECLARE_BITMAP(mask, EF10_STAT_COUNT);
-	__le64 generation_start, generation_end;
 	u64 *stats = nic_data->stats;
-	__le64 *dma_stats;

 	efx_ef10_get_stat_mask(efx, mask);

-	dma_stats = efx->stats_buffer.addr;
-
-	generation_end = dma_stats[efx->num_mac_stats - 1];
-	if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
-		return 0;
-	rmb();
-	efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, mask,
-			     stats, efx->stats_buffer.addr, false);
-	rmb();
-	generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
-	if (generation_end != generation_start)
-		return -EAGAIN;
+	efx_nic_copy_stats(efx, nic_data->mc_stats);
+	efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT,
+			     mask, stats, nic_data->mc_stats, false);

 	/* Update derived statistics */
 	efx_nic_fix_nodesc_drop_stat(efx,
 				     &stats[EF10_STAT_port_rx_nodesc_drops]);
+	/* MC Firmware reads RX_BYTES and RX_GOOD_BYTES from the MAC.
+	 * It then calculates RX_BAD_BYTES and DMAs it to us with RX_BYTES.
+	 * We report these as port_rx_ stats. We are not given RX_GOOD_BYTES.
+	 * Here we calculate port_rx_good_bytes.
+	 */
 	stats[EF10_STAT_port_rx_good_bytes] =
 		stats[EF10_STAT_port_rx_bytes] -
 		stats[EF10_STAT_port_rx_bytes_minus_good_bytes];
+
+	/* The asynchronous reads used to calculate RX_BAD_BYTES in
+	 * MC Firmware are done such that we should not see an increase in
+	 * RX_BAD_BYTES when a good packet has arrived. Unfortunately this
+	 * does mean that the stat can decrease at times. Here we do not
+	 * update the stat unless it has increased or has gone to zero
+	 * (In the case of the NIC rebooting).
+	 * Please see Bug 33781 for a discussion of why things work this way.
+	 */
 	efx_update_diff_stat(&stats[EF10_STAT_port_rx_bad_bytes],
 			     stats[EF10_STAT_port_rx_bytes_minus_good_bytes]);
 	efx_update_sw_stats(efx, stats);
-	return 0;
-}
-
-
-static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats,
-				       struct rtnl_link_stats64 *core_stats)
-{
-	int retry;
-
-	/* If we're unlucky enough to read statistics during the DMA, wait
-	 * up to 10ms for it to finish (typically takes <500us)
-	 */
-	for (retry = 0; retry < 100; ++retry) {
-		if (efx_ef10_try_update_nic_stats_pf(efx) == 0)
-			break;
-		udelay(100);
-	}

 	return efx_ef10_update_stats_common(efx, full_stats, core_stats);
 }
@@ -3109,14 +3117,6 @@ static void efx_ef10_ev_test_generate(struct efx_channel *channel)
 	netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
 }

-void efx_ef10_handle_drain_event(struct efx_nic *efx)
-{
-	if (atomic_dec_and_test(&efx->active_queues))
-		wake_up(&efx->flush_wq);
-
-	WARN_ON(atomic_read(&efx->active_queues) < 0);
-}
-
 static int efx_ef10_fini_dmaq(struct efx_nic *efx)
 {
 	struct efx_tx_queue *tx_queue;
@@ -4023,7 +4023,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.remove = efx_ef10_remove,
 	.dimension_resources = efx_ef10_dimension_resources,
 	.init = efx_ef10_init_nic,
-	.fini = efx_port_dummy_op_void,
+	.fini = efx_ef10_fini_nic,
 	.map_reset_reason = efx_ef10_map_reset_reason,
 	.map_reset_flags = efx_ef10_map_reset_flags,
 	.reset = efx_ef10_reset,
@@ -4132,7 +4132,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.remove = efx_ef10_remove,
 	.dimension_resources = efx_ef10_dimension_resources,
 	.init = efx_ef10_init_nic,
-	.fini = efx_port_dummy_op_void,
+	.fini = efx_ef10_fini_nic,
 	.map_reset_reason = efx_ef10_map_reset_reason,
 	.map_reset_flags = efx_ef10_map_reset_flags,
 	.reset = efx_ef10_reset,

--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -133,30 +133,6 @@ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
 *
 **************************************************************************/

-/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
- * force the Autoneg bit on.
- */
-void efx_link_clear_advertising(struct efx_nic *efx)
-{
-	bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
-	efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
-}
-
-void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
-{
-	efx->wanted_fc = wanted_fc;
-	if (efx->link_advertising[0]) {
-		if (wanted_fc & EFX_FC_RX)
-			efx->link_advertising[0] |= (ADVERTISED_Pause |
-						     ADVERTISED_Asym_Pause);
-		else
-			efx->link_advertising[0] &= ~(ADVERTISED_Pause |
-						      ADVERTISED_Asym_Pause);
-		if (wanted_fc & EFX_FC_TX)
-			efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
-	}
-}
-
 static void efx_fini_port(struct efx_nic *efx);

 static int efx_probe_port(struct efx_nic *efx)
@@ -1098,7 +1074,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)

 	efx_pci_remove_main(efx);

-	efx_fini_io(efx, efx->type->mem_bar(efx));
+	efx_fini_io(efx);
 	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");

 	efx_fini_struct(efx);
@@ -1366,7 +1342,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 	return 0;

 fail3:
-	efx_fini_io(efx, efx->type->mem_bar(efx));
+	efx_fini_io(efx);
 fail2:
 	efx_fini_struct(efx);
 fail1:
@@ -1514,97 +1490,6 @@ static const struct dev_pm_ops efx_pm_ops = {
 	.restore	= efx_pm_resume,
 };

-/* A PCI error affecting this device was detected.
- * At this point MMIO and DMA may be disabled.
- * Stop the software path and request a slot reset.
- */
-static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
-					      enum pci_channel_state state)
-{
-	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
-	struct efx_nic *efx = pci_get_drvdata(pdev);
-
-	if (state == pci_channel_io_perm_failure)
-		return PCI_ERS_RESULT_DISCONNECT;
-
-	rtnl_lock();
-
-	if (efx->state != STATE_DISABLED) {
-		efx->state = STATE_RECOVERY;
-		efx->reset_pending = 0;
-
-		efx_device_detach_sync(efx);
-
-		efx_stop_all(efx);
-		efx_disable_interrupts(efx);
-
-		status = PCI_ERS_RESULT_NEED_RESET;
-	} else {
-		/* If the interface is disabled we don't want to do anything
-		 * with it.
-		 */
-		status = PCI_ERS_RESULT_RECOVERED;
-	}
-
-	rtnl_unlock();
-
-	pci_disable_device(pdev);
-
-	return status;
-}
-
-/* Fake a successful reset, which will be performed later in efx_io_resume. */
-static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
-{
-	struct efx_nic *efx = pci_get_drvdata(pdev);
-	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
-
-	if (pci_enable_device(pdev)) {
-		netif_err(efx, hw, efx->net_dev,
-			  "Cannot re-enable PCI device after reset.\n");
-		status =  PCI_ERS_RESULT_DISCONNECT;
-	}
-
-	return status;
-}
-
-/* Perform the actual reset and resume I/O operations. */
-static void efx_io_resume(struct pci_dev *pdev)
-{
-	struct efx_nic *efx = pci_get_drvdata(pdev);
-	int rc;
-
-	rtnl_lock();
-
-	if (efx->state == STATE_DISABLED)
-		goto out;
-
-	rc = efx_reset(efx, RESET_TYPE_ALL);
-	if (rc) {
-		netif_err(efx, hw, efx->net_dev,
-			  "efx_reset failed after PCI error (%d)\n", rc);
-	} else {
-		efx->state = STATE_READY;
-		netif_dbg(efx, hw, efx->net_dev,
-			  "Done resetting and resuming IO after PCI error.\n");
-	}
-
-out:
-	rtnl_unlock();
-}
-
-/* For simplicity and reliability, we always require a slot reset and try to
- * reset the hardware when a pci error affecting the device is detected.
- * We leave both the link_reset and mmio_enabled callback unimplemented:
- * with our request for slot reset the mmio_enabled callback will never be
- * called, and the link_reset callback is not used by AER or EEH mechanisms.
- */
-static const struct pci_error_handlers efx_err_handlers = {
-	.error_detected = efx_io_error_detected,
-	.slot_reset	= efx_io_slot_reset,
-	.resume		= efx_io_resume,
-};
-
 static struct pci_driver efx_pci_driver = {
 	.name		= KBUILD_MODNAME,
 	.id_table	= efx_pci_table,

--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -147,11 +147,6 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx,
 {
 	return efx->type->filter_get_rx_ids(efx, priority, buf, size);
 }
-#ifdef CONFIG_RFS_ACCEL
-int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
-		   u16 rxq_index, u32 flow_id);
-bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota);
-#endif

 /* RSS contexts */
 static inline bool efx_rss_active(struct efx_rss_context *ctx)
@@ -216,9 +211,6 @@ static inline void efx_schedule_channel_irq(struct efx_channel *channel)
 	efx_schedule_channel(channel);
 }

-void efx_link_clear_advertising(struct efx_nic *efx);
-void efx_link_set_wanted_fc(struct efx_nic *efx, u8);
-
 static inline void efx_device_detach_sync(struct efx_nic *efx)
 {
 	struct net_device *dev = efx->net_dev;

--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -175,6 +175,13 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
 		efx->n_xdp_channels = 0;
 		efx->xdp_tx_per_channel = 0;
 		efx->xdp_tx_queue_count = 0;
+	} else if (n_channels + n_xdp_tx > efx->max_vis) {
+		netif_err(efx, drv, efx->net_dev,
+			  "Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
+			  n_xdp_tx, n_channels, efx->max_vis);
+		efx->n_xdp_channels = 0;
+		efx->xdp_tx_per_channel = 0;
+		efx->xdp_tx_queue_count = 0;
 	} else {
 		efx->n_xdp_channels = n_xdp_ev;
 		efx->xdp_tx_per_channel = EFX_TXQ_TYPES;

--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -383,6 +383,30 @@ static void efx_stop_datapath(struct efx_nic *efx)
 *
 **************************************************************************/

+/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
+ * force the Autoneg bit on.
+ */
+void efx_link_clear_advertising(struct efx_nic *efx)
+{
+	bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
+	efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
+}
+
+void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
+{
+	efx->wanted_fc = wanted_fc;
+	if (efx->link_advertising[0]) {
+		if (wanted_fc & EFX_FC_RX)
+			efx->link_advertising[0] |= (ADVERTISED_Pause |
+						     ADVERTISED_Asym_Pause);
+		else
+			efx->link_advertising[0] &= ~(ADVERTISED_Pause |
+						      ADVERTISED_Asym_Pause);
+		if (wanted_fc & EFX_FC_TX)
+			efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
+	}
+}
+
 static void efx_start_port(struct efx_nic *efx)
 {
 	netif_dbg(efx, ifup, efx->net_dev, "start port\n");
@@ -929,6 +953,8 @@ int efx_init_struct(struct efx_nic *efx,
 	INIT_WORK(&efx->mac_work, efx_mac_work);
 	init_waitqueue_head(&efx->flush_wq);

+	efx->mem_bar = UINT_MAX;
+
 	rc = efx_init_channels(efx);
 	if (rc)
 		goto fail;
@@ -972,7 +998,9 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
 	struct pci_dev *pci_dev = efx->pci_dev;
 	int rc;

-	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
+	efx->mem_bar = UINT_MAX;
+
+	netif_dbg(efx, probe, efx->net_dev, "initialising I/O bar=%d\n", bar);

 	rc = pci_enable_device(pci_dev);
 	if (rc) {
@@ -1014,21 +1042,21 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
 	rc = pci_request_region(pci_dev, bar, "sfc");
 	if (rc) {
 		netif_err(efx, probe, efx->net_dev,
-			  "request for memory BAR failed\n");
+			  "request for memory BAR[%d] failed\n", bar);
 		rc = -EIO;
 		goto fail3;
 	}
-
+	efx->mem_bar = bar;
 	efx->membase = ioremap(efx->membase_phys, mem_map_size);
 	if (!efx->membase) {
 		netif_err(efx, probe, efx->net_dev,
-			  "could not map memory BAR at %llx+%x\n",
+			  "could not map memory BAR[%d] at %llx+%x\n", bar,
 			  (unsigned long long)efx->membase_phys, mem_map_size);
 		rc = -ENOMEM;
 		goto fail4;
 	}
 	netif_dbg(efx, probe, efx->net_dev,
-		  "memory BAR at %llx+%x (virtual %p)\n",
+		  "memory BAR[%d] at %llx+%x (virtual %p)\n", bar,
 		  (unsigned long long)efx->membase_phys, mem_map_size,
 		  efx->membase);

@@ -1044,7 +1072,7 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
 	return rc;
 }

-void efx_fini_io(struct efx_nic *efx, int bar)
+void efx_fini_io(struct efx_nic *efx)
 {
 	netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");

@@ -1054,8 +1082,9 @@ void efx_fini_io(struct efx_nic *efx, int bar)
 	}

 	if (efx->membase_phys) {
-		pci_release_region(efx->pci_dev, bar);
+		pci_release_region(efx->pci_dev, efx->mem_bar);
 		efx->membase_phys = 0;
+		efx->mem_bar = UINT_MAX;
 	}

 	/* Don't disable bus-mastering if VFs are assigned */
@@ -1101,3 +1130,94 @@ void efx_fini_mcdi_logging(struct efx_nic *efx)
 	device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
 }
 #endif
+
+/* A PCI error affecting this device was detected.
+ * At this point MMIO and DMA may be disabled.
+ * Stop the software path and request a slot reset.
+ */
+static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
+					      enum pci_channel_state state)
+{
+	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+	struct efx_nic *efx = pci_get_drvdata(pdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	rtnl_lock();
+
+	if (efx->state != STATE_DISABLED) {
+		efx->state = STATE_RECOVERY;
+		efx->reset_pending = 0;
+
+		efx_device_detach_sync(efx);
+
+		efx_stop_all(efx);
+		efx_disable_interrupts(efx);
+
+		status = PCI_ERS_RESULT_NEED_RESET;
+	} else {
+		/* If the interface is disabled we don't want to do anything
+		 * with it.
+		 */
+		status = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	rtnl_unlock();
+
+	pci_disable_device(pdev);
+
+	return status;
+}
+
+/* Fake a successful reset, which will be performed later in efx_io_resume. */
+static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
+{
+	struct efx_nic *efx = pci_get_drvdata(pdev);
+	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+
+	if (pci_enable_device(pdev)) {
+		netif_err(efx, hw, efx->net_dev,
+			  "Cannot re-enable PCI device after reset.\n");
+		status =  PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	return status;
+}
+
+/* Perform the actual reset and resume I/O operations. */
+static void efx_io_resume(struct pci_dev *pdev)
+{
+	struct efx_nic *efx = pci_get_drvdata(pdev);
+	int rc;
+
+	rtnl_lock();
+
+	if (efx->state == STATE_DISABLED)
+		goto out;
+
+	rc = efx_reset(efx, RESET_TYPE_ALL);
+	if (rc) {
+		netif_err(efx, hw, efx->net_dev,
+			  "efx_reset failed after PCI error (%d)\n", rc);
+	} else {
+		efx->state = STATE_READY;
+		netif_dbg(efx, hw, efx->net_dev,
+			  "Done resetting and resuming IO after PCI error.\n");
+	}
+
+out:
+	rtnl_unlock();
+}
+
+/* For simplicity and reliability, we always require a slot reset and try to
+ * reset the hardware when a pci error affecting the device is detected.
+ * We leave both the link_reset and mmio_enabled callback unimplemented:
+ * with our request for slot reset the mmio_enabled callback will never be
+ * called, and the link_reset callback is not used by AER or EEH mechanisms.
+ */
+const struct pci_error_handlers efx_err_handlers = {
+	.error_detected = efx_io_error_detected,
+	.slot_reset	= efx_io_slot_reset,
+	.resume		= efx_io_resume,
+};
--- a/drivers/net/ethernet/sfc/efx_common.h
+++ b/drivers/net/ethernet/sfc/efx_common.h
@@ -13,11 +13,14 @@

 int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
 		unsigned int mem_map_size);
-void efx_fini_io(struct efx_nic *efx, int bar);
+void efx_fini_io(struct efx_nic *efx);
 int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev,
 		    struct net_device *net_dev);
 void efx_fini_struct(struct efx_nic *efx);

+void efx_link_clear_advertising(struct efx_nic *efx);
+void efx_link_set_wanted_fc(struct efx_nic *efx, u8);
+
 void efx_start_all(struct efx_nic *efx);
 void efx_stop_all(struct efx_nic *efx);

@@ -70,4 +73,5 @@ void efx_link_status_changed(struct efx_nic *efx);
 unsigned int efx_xdp_max_mtu(struct efx_nic *efx);
 int efx_change_mtu(struct net_device *net_dev, int new_mtu);

+extern const struct pci_error_handlers efx_err_handlers;
 #endif
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
--- a/drivers/net/ethernet/sfc/ethtool_common.c
+++ b/drivers/net/ethernet/sfc/ethtool_common.c
--- a/drivers/net/ethernet/sfc/ethtool_common.h
+++ b/drivers/net/ethernet/sfc/ethtool_common.h
@@ -15,8 +15,13 @@ void efx_ethtool_get_drvinfo(struct net_device *net_dev,
 			     struct ethtool_drvinfo *info);
 u32 efx_ethtool_get_msglevel(struct net_device *net_dev);
 void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable);
+void efx_ethtool_self_test(struct net_device *net_dev,
+			   struct ethtool_test *test, u64 *data);
+int efx_ethtool_nway_reset(struct net_device *net_dev);
 void efx_ethtool_get_pauseparam(struct net_device *net_dev,
 				struct ethtool_pauseparam *pause);
+int efx_ethtool_set_pauseparam(struct net_device *net_dev,
+			       struct ethtool_pauseparam *pause);
 int efx_ethtool_fill_self_tests(struct efx_nic *efx,
 				struct efx_self_tests *tests,
 				u8 *strings, u64 *data);
@@ -26,5 +31,34 @@ void efx_ethtool_get_strings(struct net_device *net_dev, u32 string_set,
 void efx_ethtool_get_stats(struct net_device *net_dev,
 			   struct ethtool_stats *stats __attribute__ ((unused)),
 			   u64 *data);
-
+int efx_ethtool_get_link_ksettings(struct net_device *net_dev,
+				   struct ethtool_link_ksettings *out);
+int efx_ethtool_set_link_ksettings(struct net_device *net_dev,
+				   const struct ethtool_link_ksettings *settings);
+int efx_ethtool_get_fecparam(struct net_device *net_dev,
+			     struct ethtool_fecparam *fecparam);
+int efx_ethtool_set_fecparam(struct net_device *net_dev,
+			     struct ethtool_fecparam *fecparam);
+int efx_ethtool_get_rxnfc(struct net_device *net_dev,
+			  struct ethtool_rxnfc *info, u32 *rule_locs);
+int efx_ethtool_set_rxnfc(struct net_device *net_dev,
+			  struct ethtool_rxnfc *info);
+u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev);
+u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev);
+int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
+			 u8 *hfunc);
+int efx_ethtool_set_rxfh(struct net_device *net_dev,
+			 const u32 *indir, const u8 *key, const u8 hfunc);
+int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
+				 u8 *key, u8 *hfunc, u32 rss_context);
+int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
+				 const u32 *indir, const u8 *key,
+				 const u8 hfunc, u32 *rss_context,
+				 bool delete);
+int efx_ethtool_reset(struct net_device *net_dev, u32 *flags);
+int efx_ethtool_get_module_eeprom(struct net_device *net_dev,
+				  struct ethtool_eeprom *ee,
+				  u8 *data);
+int efx_ethtool_get_module_info(struct net_device *net_dev,
+				struct ethtool_modinfo *modinfo);
 #endif
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1299,6 +1299,14 @@ static void efx_mcdi_abandon(struct efx_nic *efx)
 	efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT);
 }

+static void efx_handle_drain_event(struct efx_nic *efx)
+{
+	if (atomic_dec_and_test(&efx->active_queues))
+		wake_up(&efx->flush_wq);
+
+	WARN_ON(atomic_read(&efx->active_queues) < 0);
+}
+
 /* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */
 void efx_mcdi_process_event(struct efx_channel *channel,
 			    efx_qword_t *event)
@@ -1371,7 +1379,7 @@ void efx_mcdi_process_event(struct efx_channel *channel,
 		BUILD_BUG_ON(MCDI_EVENT_TX_FLUSH_TO_DRIVER_LBN !=
 			     MCDI_EVENT_RX_FLUSH_TO_DRIVER_LBN);
 		if (!MCDI_EVENT_FIELD(*event, TX_FLUSH_TO_DRIVER))
-			efx_ef10_handle_drain_event(efx);
+			efx_handle_drain_event(efx);
 		break;
 	case MCDI_EVENT_CODE_TX_ERR:
 	case MCDI_EVENT_CODE_RX_ERR:

--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -332,10 +332,9 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev);
 #define MCDI_CAPABILITY_OFST(field) \
 	MC_CMD_GET_CAPABILITIES_V4_OUT_ ## field ## _OFST

-/* field is FLAGS1 or FLAGS2 */
-#define efx_has_cap(efx, flag, field) \
+#define efx_has_cap(efx, field) \
 	efx->type->check_caps(efx, \
-			      MCDI_CAPABILITY(flag), \
+			      MCDI_CAPABILITY(field), \
 			      MCDI_CAPABILITY_OFST(field))

 void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len);

--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -828,7 +828,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
 		efx_filter_set_uc_def(&spec);

 	if (encap_type) {
-		if (efx_has_cap(efx, VXLAN_NVGRE, FLAGS1))
+		if (efx_has_cap(efx, VXLAN_NVGRE))
 			efx_filter_set_encap_type(&spec, encap_type);
 		else
 			/*
@@ -1304,7 +1304,7 @@ int efx_mcdi_filter_table_probe(struct efx_nic *efx, bool multicast_chaining)
 	rc = efx_mcdi_filter_table_probe_matches(efx, table, false);
 	if (rc)
 		goto fail;
-	if (efx_has_cap(efx, VXLAN_NVGRE, FLAGS1))
+	if (efx_has_cap(efx, VXLAN_NVGRE))
 		rc = efx_mcdi_filter_table_probe_matches(efx, table, true);
 	if (rc)
 		goto fail;
@@ -1927,7 +1927,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive
 		return 0;
 	}

-	if (efx_has_cap(efx, RX_RSS_LIMITED, FLAGS1))
+	if (efx_has_cap(efx, RX_RSS_LIMITED))
 		return -EOPNOTSUPP;

 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
@@ -1948,7 +1948,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive
 	if (context_size)
 		*context_size = rss_spread;

-	if (efx_has_cap(efx, ADDITIONAL_RSS_MODES, FLAGS1))
+	if (efx_has_cap(efx, ADDITIONAL_RSS_MODES))
 		efx_mcdi_set_rss_context_flags(efx, ctx);

 	return 0;

--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -961,6 +961,7 @@ struct efx_async_filter_insertion {
 * @vpd_sn: Serial number read from VPD
 * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
 *      xdp_rxq_info structures?
+ * @mem_bar: The BAR that is mapped into membase.
 * @monitor_work: Hardware monitor workitem
 * @biu_lock: BIU (bus interface unit) lock
 * @last_irq_cpu: Last CPU to handle a possible test interrupt.  This
@@ -1022,6 +1023,7 @@ struct efx_nic {
 	unsigned next_buffer_table;

 	unsigned int max_channels;
+	unsigned int max_vis;
 	unsigned int max_tx_channels;
 	unsigned n_channels;
 	unsigned n_rx_channels;
@@ -1136,6 +1138,8 @@ struct efx_nic {
 	char *vpd_sn;
 	bool xdp_rxq_info_failed;

+	unsigned int mem_bar;
+
 	/* The following fields may be written more often */

 	struct delayed_work monitor_work ____cacheline_aligned_in_smp;

--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -20,6 +20,8 @@
 #include "farch_regs.h"
 #include "io.h"
 #include "workarounds.h"
+#include "mcdi_port_common.h"
+#include "mcdi_pcol.h"

 /**************************************************************************
 *
@@ -470,6 +472,49 @@ size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count,
 	return visible;
 }

+/**
+ * efx_nic_copy_stats - Copy stats from the DMA buffer in to an
+ *	intermediate buffer. This is used to get a consistent
+ *	set of stats while the DMA buffer can be written at any time
+ *	by the NIC.
+ * @efx: The associated NIC.
+ * @dest: Destination buffer. Must be the same size as the DMA buffer.
+ */
+int efx_nic_copy_stats(struct efx_nic *efx, __le64 *dest)
+{
+	__le64 *dma_stats = efx->stats_buffer.addr;
+	__le64 generation_start, generation_end;
+	int rc = 0, retry;
+
+	if (!dest)
+		return 0;
+
+	if (!dma_stats)
+		goto return_zeroes;
+
+	/* If we're unlucky enough to read statistics during the DMA, wait
+	 * up to 10ms for it to finish (typically takes <500us)
+	 */
+	for (retry = 0; retry < 100; ++retry) {
+		generation_end = dma_stats[efx->num_mac_stats - 1];
+		if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
+			goto return_zeroes;
+		rmb();
+		memcpy(dest, dma_stats, efx->num_mac_stats * sizeof(__le64));
+		rmb();
+		generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
+		if (generation_end == generation_start)
+			return 0; /* return good data */
+		udelay(100);
+	}
+
+	rc = -EIO;
+
+return_zeroes:
+	memset(dest, 0, efx->num_mac_stats * sizeof(u64));
+	return rc;
+}
+
 /**
 * efx_nic_update_stats - Convert statistics DMA buffer to array of u64
 * @desc: Array of &struct efx_hw_stat_desc describing the DMA buffer

--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
--- a/drivers/net/ethernet/sfc/nic_common.h
+++ b/drivers/net/ethernet/sfc/nic_common.h
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ * Copyright 2019-2020 Xilinx Inc.
+ */
+
+#ifndef EFX_NIC_COMMON_H
+#define EFX_NIC_COMMON_H
+
+#include "net_driver.h"
+#include "efx_common.h"
+#include "mcdi.h"
+#include "ptp.h"
+
+enum {
+	/* Revisions 0-2 were Falcon A0, A1 and B0 respectively.
+	 * They are not supported by this driver but these revision numbers
+	 * form part of the ethtool API for register dumping.
+	 */
+	EFX_REV_SIENA_A0 = 3,
+	EFX_REV_HUNT_A0 = 4,
+};
+
+static inline int efx_nic_rev(struct efx_nic *efx)
+{
+	return efx->type->revision;
+}
+
+/* Read the current event from the event queue */
+static inline efx_qword_t *efx_event(struct efx_channel *channel,
+				     unsigned int index)
+{
+	return ((efx_qword_t *) (channel->eventq.buf.addr)) +
+		(index & channel->eventq_mask);
+}
+
+/* See if an event is present
+ *
+ * We check both the high and low dword of the event for all ones.  We
+ * wrote all ones when we cleared the event, and no valid event can
+ * have all ones in either its high or low dwords.  This approach is
+ * robust against reordering.
+ *
+ * Note that using a single 64-bit comparison is incorrect; even
+ * though the CPU read will be atomic, the DMA write may not be.
+ */
+static inline int efx_event_present(efx_qword_t *event)
+{
+	return !(EFX_DWORD_IS_ALL_ONES(event->dword[0]) |
+		  EFX_DWORD_IS_ALL_ONES(event->dword[1]));
+}
+
+/* Returns a pointer to the specified transmit descriptor in the TX
+ * descriptor queue belonging to the specified channel.
+ */
+static inline efx_qword_t *
+efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
+{
+	return ((efx_qword_t *) (tx_queue->txd.buf.addr)) + index;
+}
+
+/* Report whether this TX queue would be empty for the given write_count.
+ * May return false negative.
+ */
+static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue,
+					 unsigned int write_count)
+{
+	unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count);
+
+	if (empty_read_count == 0)
+		return false;
+
+	return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
+}
+
+/* Report whether the NIC considers this TX queue empty, using
+ * packet_write_count (the write count recorded for the last completable
+ * doorbell push).  May return false negative.  EF10 only, which is OK
+ * because only EF10 supports PIO.
+ */
+static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue)
+{
+	EFX_WARN_ON_ONCE_PARANOID(!tx_queue->efx->type->option_descriptors);
+	return __efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count);
+}
+
+/* Get partner of a TX queue, seen as part of the same net core queue */
+/* XXX is this a thing on EF100? */
+static inline struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
+{
+	if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
+		return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
+	else
+		return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
+}
+
+/* Decide whether we can use TX PIO, ie. write packet data directly into
+ * a buffer on the device.  This can reduce latency at the expense of
+ * throughput, so we only do this if both hardware and software TX rings
+ * are empty.  This also ensures that only one packet at a time can be
+ * using the PIO buffer.
+ */
+static inline bool efx_nic_may_tx_pio(struct efx_tx_queue *tx_queue)
+{
+	struct efx_tx_queue *partner = efx_tx_queue_partner(tx_queue);
+
+	return tx_queue->piobuf && efx_nic_tx_is_empty(tx_queue) &&
+	       efx_nic_tx_is_empty(partner);
+}
+
+/* Decide whether to push a TX descriptor to the NIC vs merely writing
+ * the doorbell.  This can reduce latency when we are adding a single
+ * descriptor to an empty queue, but is otherwise pointless.  Further,
+ * Falcon and Siena have hardware bugs (SF bug 33851) that may be
+ * triggered if we don't check this.
+ * We use the write_count used for the last doorbell push, to get the
+ * NIC's view of the tx queue.
+ */
+static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue,
+					    unsigned int write_count)
+{
+	bool was_empty = __efx_nic_tx_is_empty(tx_queue, write_count);
+
+	tx_queue->empty_read_count = 0;
+	return was_empty && tx_queue->write_count - write_count == 1;
+}
+
+/* Returns a pointer to the specified descriptor in the RX descriptor queue */
+static inline efx_qword_t *
+efx_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index)
+{
+	return ((efx_qword_t *) (rx_queue->rxd.buf.addr)) + index;
+}
+
+/* Alignment of PCIe DMA boundaries (4KB) */
+#define EFX_PAGE_SIZE	4096
+/* Size and alignment of buffer table entries (same) */
+#define EFX_BUF_SIZE	EFX_PAGE_SIZE
+
+/* NIC-generic software stats */
+enum {
+	GENERIC_STAT_rx_noskb_drops,
+	GENERIC_STAT_rx_nodesc_trunc,
+	GENERIC_STAT_COUNT
+};
+
+#define EFX_GENERIC_SW_STAT(ext_name)				\
+	[GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
+
+/* TX data path */
+static inline int efx_nic_probe_tx(struct efx_tx_queue *tx_queue)
+{
+	return tx_queue->efx->type->tx_probe(tx_queue);
+}
+static inline void efx_nic_init_tx(struct efx_tx_queue *tx_queue)
+{
+	tx_queue->efx->type->tx_init(tx_queue);
+}
+static inline void efx_nic_remove_tx(struct efx_tx_queue *tx_queue)
+{
+	tx_queue->efx->type->tx_remove(tx_queue);
+}
+static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
+{
+	tx_queue->efx->type->tx_write(tx_queue);
+}
+
+/* RX data path */
+static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue)
+{
+	return rx_queue->efx->type->rx_probe(rx_queue);
+}
+static inline void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
+{
+	rx_queue->efx->type->rx_init(rx_queue);
+}
+static inline void efx_nic_remove_rx(struct efx_rx_queue *rx_queue)
+{
+	rx_queue->efx->type->rx_remove(rx_queue);
+}
+static inline void efx_nic_notify_rx_desc(struct efx_rx_queue *rx_queue)
+{
+	rx_queue->efx->type->rx_write(rx_queue);
+}
+static inline void efx_nic_generate_fill_event(struct efx_rx_queue *rx_queue)
+{
+	rx_queue->efx->type->rx_defer_refill(rx_queue);
+}
+
+/* Event data path */
+static inline int efx_nic_probe_eventq(struct efx_channel *channel)
+{
+	return channel->efx->type->ev_probe(channel);
+}
+static inline int efx_nic_init_eventq(struct efx_channel *channel)
+{
+	return channel->efx->type->ev_init(channel);
+}
+static inline void efx_nic_fini_eventq(struct efx_channel *channel)
+{
+	channel->efx->type->ev_fini(channel);
+}
+static inline void efx_nic_remove_eventq(struct efx_channel *channel)
+{
+	channel->efx->type->ev_remove(channel);
+}
+static inline int
+efx_nic_process_eventq(struct efx_channel *channel, int quota)
+{
+	return channel->efx->type->ev_process(channel, quota);
+}
+static inline void efx_nic_eventq_read_ack(struct efx_channel *channel)
+{
+	channel->efx->type->ev_read_ack(channel);
+}
+
+void efx_nic_event_test_start(struct efx_channel *channel);
+
+bool efx_nic_event_present(struct efx_channel *channel);
+
+/* Some statistics are computed as A - B where A and B each increase
+ * linearly with some hardware counter(s) and the counters are read
+ * asynchronously.  If the counters contributing to B are always read
+ * after those contributing to A, the computed value may be lower than
+ * the true value by some variable amount, and may decrease between
+ * subsequent computations.
+ *
+ * We should never allow statistics to decrease or to exceed the true
+ * value.  Since the computed value will never be greater than the
+ * true value, we can achieve this by only storing the computed value
+ * when it increases.
+ */
+static inline void efx_update_diff_stat(u64 *stat, u64 diff)
+{
+	if ((s64)(diff - *stat) > 0)
+		*stat = diff;
+}
+
+/* Interrupts */
+int efx_nic_init_interrupt(struct efx_nic *efx);
+int efx_nic_irq_test_start(struct efx_nic *efx);
+void efx_nic_fini_interrupt(struct efx_nic *efx);
+
+static inline int efx_nic_event_test_irq_cpu(struct efx_channel *channel)
+{
+	return READ_ONCE(channel->event_test_cpu);
+}
+static inline int efx_nic_irq_test_irq_cpu(struct efx_nic *efx)
+{
+	return READ_ONCE(efx->last_irq_cpu);
+}
+
+/* Global Resources */
+int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer,
+			 unsigned int len, gfp_t gfp_flags);
+void efx_nic_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer);
+
+size_t efx_nic_get_regs_len(struct efx_nic *efx);
+void efx_nic_get_regs(struct efx_nic *efx, void *buf);
+
+size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count,
+			      const unsigned long *mask, u8 *names);
+int efx_nic_copy_stats(struct efx_nic *efx, __le64 *dest);
+void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count,
+			  const unsigned long *mask, u64 *stats,
+			  const void *dma_buf, bool accumulate);
+void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *stat);
+
+#define EFX_MAX_FLUSH_TIME 5000
+
+#endif /* EFX_NIC_COMMON_H */
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -35,7 +35,6 @@
 #include <linux/time.h>
 #include <linux/ktime.h>
 #include <linux/module.h>
-#include <linux/net_tstamp.h>
 #include <linux/pps_kernel.h>
 #include <linux/ptp_clock_kernel.h>
 #include "net_driver.h"
@@ -44,7 +43,7 @@
 #include "mcdi_pcol.h"
 #include "io.h"
 #include "farch_regs.h"
-#include "nic.h"
+#include "nic.h" /* indirectly includes ptp.h */

 /* Maximum number of events expected to make up a PTP event */
 #define	MAX_EVENT_FRAGS			3
@@ -352,7 +351,7 @@ static int efx_phc_enable(struct ptp_clock_info *ptp,

 bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx)
 {
-	return efx_has_cap(efx, TX_MAC_TIMESTAMPING, FLAGS2);
+	return efx_has_cap(efx, TX_MAC_TIMESTAMPING);
 }

 /* PTP 'extra' channel is still a traffic channel, but we only create TX queues

--- a/drivers/net/ethernet/sfc/ptp.h
+++ b/drivers/net/ethernet/sfc/ptp.h
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ * Copyright 2019-2020 Xilinx Inc.
+ */
+
+#ifndef EFX_PTP_H
+#define EFX_PTP_H
+
+#include <linux/net_tstamp.h>
+#include "net_driver.h"
+
+struct ethtool_ts_info;
+int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
+void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
+struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
+void efx_ptp_remove(struct efx_nic *efx);
+int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
+int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
+void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info);
+bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
+int efx_ptp_get_mode(struct efx_nic *efx);
+int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted,
+			unsigned int new_mode);
+int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
+void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev);
+size_t efx_ptp_describe_stats(struct efx_nic *efx, u8 *strings);
+size_t efx_ptp_update_stats(struct efx_nic *efx, u64 *stats);
+void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev);
+void __efx_rx_skb_attach_timestamp(struct efx_channel *channel,
+				   struct sk_buff *skb);
+static inline void efx_rx_skb_attach_timestamp(struct efx_channel *channel,
+					       struct sk_buff *skb)
+{
+	if (channel->sync_events_state == SYNC_EVENTS_VALID)
+		__efx_rx_skb_attach_timestamp(channel, skb);
+}
+void efx_ptp_start_datapath(struct efx_nic *efx);
+void efx_ptp_stop_datapath(struct efx_nic *efx);
+bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx);
+ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue);
+
+#endif /* EFX_PTP_H */
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -411,243 +411,9 @@ void __efx_rx_packet(struct efx_channel *channel)
 		rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;

 	if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb)
-		efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
+		efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh, 0);
 	else
 		efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
 out:
 	channel->rx_pkt_n_frags = 0;
 }
-
-#ifdef CONFIG_RFS_ACCEL
-
-static void efx_filter_rfs_work(struct work_struct *data)
-{
-	struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
-							      work);
-	struct efx_nic *efx = netdev_priv(req->net_dev);
-	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
-	int slot_idx = req - efx->rps_slot;
-	struct efx_arfs_rule *rule;
-	u16 arfs_id = 0;
-	int rc;
-
-	rc = efx->type->filter_insert(efx, &req->spec, true);
-	if (rc >= 0)
-		/* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
-		rc %= efx->type->max_rx_ip_filters;
-	if (efx->rps_hash_table) {
-		spin_lock_bh(&efx->rps_hash_lock);
-		rule = efx_rps_hash_find(efx, &req->spec);
-		/* The rule might have already gone, if someone else's request
-		 * for the same spec was already worked and then expired before
-		 * we got around to our work.  In that case we have nothing
-		 * tying us to an arfs_id, meaning that as soon as the filter
-		 * is considered for expiry it will be removed.
-		 */
-		if (rule) {
-			if (rc < 0)
-				rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
-			else
-				rule->filter_id = rc;
-			arfs_id = rule->arfs_id;
-		}
-		spin_unlock_bh(&efx->rps_hash_lock);
-	}
-	if (rc >= 0) {
-		/* Remember this so we can check whether to expire the filter
-		 * later.
-		 */
-		mutex_lock(&efx->rps_mutex);
-		if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
-			channel->rfs_filter_count++;
-		channel->rps_flow_id[rc] = req->flow_id;
-		mutex_unlock(&efx->rps_mutex);
-
-		if (req->spec.ether_type == htons(ETH_P_IP))
-			netif_info(efx, rx_status, efx->net_dev,
-				   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
-				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-				   req->spec.rem_host, ntohs(req->spec.rem_port),
-				   req->spec.loc_host, ntohs(req->spec.loc_port),
-				   req->rxq_index, req->flow_id, rc, arfs_id);
-		else
-			netif_info(efx, rx_status, efx->net_dev,
-				   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
-				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-				   req->spec.rem_host, ntohs(req->spec.rem_port),
-				   req->spec.loc_host, ntohs(req->spec.loc_port),
-				   req->rxq_index, req->flow_id, rc, arfs_id);
-		channel->n_rfs_succeeded++;
-	} else {
-		if (req->spec.ether_type == htons(ETH_P_IP))
-			netif_dbg(efx, rx_status, efx->net_dev,
-				  "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
-				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-				  req->spec.rem_host, ntohs(req->spec.rem_port),
-				  req->spec.loc_host, ntohs(req->spec.loc_port),
-				  req->rxq_index, req->flow_id, rc, arfs_id);
-		else
-			netif_dbg(efx, rx_status, efx->net_dev,
-				  "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
-				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
-				  req->spec.rem_host, ntohs(req->spec.rem_port),
-				  req->spec.loc_host, ntohs(req->spec.loc_port),
-				  req->rxq_index, req->flow_id, rc, arfs_id);
-		channel->n_rfs_failed++;
-		/* We're overloading the NIC's filter tables, so let's do a
-		 * chunk of extra expiry work.
-		 */
-		__efx_filter_rfs_expire(channel, min(channel->rfs_filter_count,
-						     100u));
-	}
-
-	/* Release references */
-	clear_bit(slot_idx, &efx->rps_slot_map);
-	dev_put(req->net_dev);
-}
-
-int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
-		   u16 rxq_index, u32 flow_id)
-{
-	struct efx_nic *efx = netdev_priv(net_dev);
-	struct efx_async_filter_insertion *req;
-	struct efx_arfs_rule *rule;
-	struct flow_keys fk;
-	int slot_idx;
-	bool new;
-	int rc;
-
-	/* find a free slot */
-	for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
-		if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
-			break;
-	if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
-		return -EBUSY;
-
-	if (flow_id == RPS_FLOW_ID_INVALID) {
-		rc = -EINVAL;
-		goto out_clear;
-	}
-
-	if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
-		rc = -EPROTONOSUPPORT;
-		goto out_clear;
-	}
-
-	if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
-		rc = -EPROTONOSUPPORT;
-		goto out_clear;
-	}
-	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
-		rc = -EPROTONOSUPPORT;
-		goto out_clear;
-	}
-
-	req = efx->rps_slot + slot_idx;
-	efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
-			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
-			   rxq_index);
-	req->spec.match_flags =
-		EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
-		EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
-		EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
-	req->spec.ether_type = fk.basic.n_proto;
-	req->spec.ip_proto = fk.basic.ip_proto;
-
-	if (fk.basic.n_proto == htons(ETH_P_IP)) {
-		req->spec.rem_host[0] = fk.addrs.v4addrs.src;
-		req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
-	} else {
-		memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
-		       sizeof(struct in6_addr));
-		memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
-		       sizeof(struct in6_addr));
-	}
-
-	req->spec.rem_port = fk.ports.src;
-	req->spec.loc_port = fk.ports.dst;
-
-	if (efx->rps_hash_table) {
-		/* Add it to ARFS hash table */
-		spin_lock(&efx->rps_hash_lock);
-		rule = efx_rps_hash_add(efx, &req->spec, &new);
-		if (!rule) {
-			rc = -ENOMEM;
-			goto out_unlock;
-		}
-		if (new)
-			rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
-		rc = rule->arfs_id;
-		/* Skip if existing or pending filter already does the right thing */
-		if (!new && rule->rxq_index == rxq_index &&
-		    rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
-			goto out_unlock;
-		rule->rxq_index = rxq_index;
-		rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
-		spin_unlock(&efx->rps_hash_lock);
-	} else {
-		/* Without an ARFS hash table, we just use arfs_id 0 for all
-		 * filters.  This means if multiple flows hash to the same
-		 * flow_id, all but the most recently touched will be eligible
-		 * for expiry.
-		 */
-		rc = 0;
-	}
-
-	/* Queue the request */
-	dev_hold(req->net_dev = net_dev);
-	INIT_WORK(&req->work, efx_filter_rfs_work);
-	req->rxq_index = rxq_index;
-	req->flow_id = flow_id;
-	schedule_work(&req->work);
-	return rc;
-out_unlock:
-	spin_unlock(&efx->rps_hash_lock);
-out_clear:
-	clear_bit(slot_idx, &efx->rps_slot_map);
-	return rc;
-}
-
-bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
-{
-	bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
-	struct efx_nic *efx = channel->efx;
-	unsigned int index, size, start;
-	u32 flow_id;
-
-	if (!mutex_trylock(&efx->rps_mutex))
-		return false;
-	expire_one = efx->type->filter_rfs_expire_one;
-	index = channel->rfs_expire_index;
-	start = index;
-	size = efx->type->max_rx_ip_filters;
-	while (quota) {
-		flow_id = channel->rps_flow_id[index];
-
-		if (flow_id != RPS_FLOW_ID_INVALID) {
-			quota--;
-			if (expire_one(efx, flow_id, index)) {
-				netif_info(efx, rx_status, efx->net_dev,
-					   "expired filter %d [channel %u flow %u]\n",
-					   index, channel->channel, flow_id);
-				channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
-				channel->rfs_filter_count--;
-			}
-		}
-		if (++index == size)
-			index = 0;
-		/* If we were called with a quota that exceeds the total number
-		 * of filters in the table (which shouldn't happen, but could
-		 * if two callers race), ensure that we don't loop forever -
-		 * stop when we've examined every row of the table.
-		 */
-		if (index == start)
-			break;
-	}
-
-	channel->rfs_expire_index = index;
-	mutex_unlock(&efx->rps_mutex);
-	return true;
-}
-
-#endif /* CONFIG_RFS_ACCEL */
--- a/drivers/net/ethernet/sfc/rx_common.c
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -510,7 +510,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
 */
 void
 efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
-		  unsigned int n_frags, u8 *eh)
+		  unsigned int n_frags, u8 *eh, __wsum csum)
 {
 	struct napi_struct *napi = &channel->napi_str;
 	struct efx_nic *efx = channel->efx;
@@ -528,8 +528,13 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
 	if (efx->net_dev->features & NETIF_F_RXHASH)
 		skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
 			     PKT_HASH_TYPE_L3);
+	if (csum) {
+		skb->csum = csum;
+		skb->ip_summed = CHECKSUM_COMPLETE;
+	} else {
 		skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
 				  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
+	}
 	skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);

 	for (;;) {
@@ -849,3 +854,237 @@ void efx_remove_filters(struct efx_nic *efx)
 	efx->type->filter_table_remove(efx);
 	up_write(&efx->filter_sem);
 }
+
+#ifdef CONFIG_RFS_ACCEL
+
+static void efx_filter_rfs_work(struct work_struct *data)
+{
+	struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
+							      work);
+	struct efx_nic *efx = netdev_priv(req->net_dev);
+	struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
+	int slot_idx = req - efx->rps_slot;
+	struct efx_arfs_rule *rule;
+	u16 arfs_id = 0;
+	int rc;
+
+	rc = efx->type->filter_insert(efx, &req->spec, true);
+	if (rc >= 0)
+		/* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
+		rc %= efx->type->max_rx_ip_filters;
+	if (efx->rps_hash_table) {
+		spin_lock_bh(&efx->rps_hash_lock);
+		rule = efx_rps_hash_find(efx, &req->spec);
+		/* The rule might have already gone, if someone else's request
+		 * for the same spec was already worked and then expired before
+		 * we got around to our work.  In that case we have nothing
+		 * tying us to an arfs_id, meaning that as soon as the filter
+		 * is considered for expiry it will be removed.
+		 */
+		if (rule) {
+			if (rc < 0)
+				rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
+			else
+				rule->filter_id = rc;
+			arfs_id = rule->arfs_id;
+		}
+		spin_unlock_bh(&efx->rps_hash_lock);
+	}
+	if (rc >= 0) {
+		/* Remember this so we can check whether to expire the filter
+		 * later.
+		 */
+		mutex_lock(&efx->rps_mutex);
+		if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
+			channel->rfs_filter_count++;
+		channel->rps_flow_id[rc] = req->flow_id;
+		mutex_unlock(&efx->rps_mutex);
+
+		if (req->spec.ether_type == htons(ETH_P_IP))
+			netif_info(efx, rx_status, efx->net_dev,
+				   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
+				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				   req->spec.rem_host, ntohs(req->spec.rem_port),
+				   req->spec.loc_host, ntohs(req->spec.loc_port),
+				   req->rxq_index, req->flow_id, rc, arfs_id);
+		else
+			netif_info(efx, rx_status, efx->net_dev,
+				   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
+				   (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				   req->spec.rem_host, ntohs(req->spec.rem_port),
+				   req->spec.loc_host, ntohs(req->spec.loc_port),
+				   req->rxq_index, req->flow_id, rc, arfs_id);
+		channel->n_rfs_succeeded++;
+	} else {
+		if (req->spec.ether_type == htons(ETH_P_IP))
+			netif_dbg(efx, rx_status, efx->net_dev,
+				  "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
+				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				  req->spec.rem_host, ntohs(req->spec.rem_port),
+				  req->spec.loc_host, ntohs(req->spec.loc_port),
+				  req->rxq_index, req->flow_id, rc, arfs_id);
+		else
+			netif_dbg(efx, rx_status, efx->net_dev,
+				  "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
+				  (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+				  req->spec.rem_host, ntohs(req->spec.rem_port),
+				  req->spec.loc_host, ntohs(req->spec.loc_port),
+				  req->rxq_index, req->flow_id, rc, arfs_id);
+		channel->n_rfs_failed++;
+		/* We're overloading the NIC's filter tables, so let's do a
+		 * chunk of extra expiry work.
+		 */
+		__efx_filter_rfs_expire(channel, min(channel->rfs_filter_count,
+						     100u));
+	}
+
+	/* Release references */
+	clear_bit(slot_idx, &efx->rps_slot_map);
+	dev_put(req->net_dev);
+}
+
+int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+		   u16 rxq_index, u32 flow_id)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
+	struct efx_async_filter_insertion *req;
+	struct efx_arfs_rule *rule;
+	struct flow_keys fk;
+	int slot_idx;
+	bool new;
+	int rc;
+
+	/* find a free slot */
+	for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
+		if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
+			break;
+	if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
+		return -EBUSY;
+
+	if (flow_id == RPS_FLOW_ID_INVALID) {
+		rc = -EINVAL;
+		goto out_clear;
+	}
+
+	if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
+		rc = -EPROTONOSUPPORT;
+		goto out_clear;
+	}
+
+	if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
+		rc = -EPROTONOSUPPORT;
+		goto out_clear;
+	}
+	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
+		rc = -EPROTONOSUPPORT;
+		goto out_clear;
+	}
+
+	req = efx->rps_slot + slot_idx;
+	efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
+			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
+			   rxq_index);
+	req->spec.match_flags =
+		EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
+		EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
+		EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
+	req->spec.ether_type = fk.basic.n_proto;
+	req->spec.ip_proto = fk.basic.ip_proto;
+
+	if (fk.basic.n_proto == htons(ETH_P_IP)) {
+		req->spec.rem_host[0] = fk.addrs.v4addrs.src;
+		req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
+	} else {
+		memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
+		       sizeof(struct in6_addr));
+		memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
+		       sizeof(struct in6_addr));
+	}
+
+	req->spec.rem_port = fk.ports.src;
+	req->spec.loc_port = fk.ports.dst;
+
+	if (efx->rps_hash_table) {
+		/* Add it to ARFS hash table */
+		spin_lock(&efx->rps_hash_lock);
+		rule = efx_rps_hash_add(efx, &req->spec, &new);
+		if (!rule) {
+			rc = -ENOMEM;
+			goto out_unlock;
+		}
+		if (new)
+			rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
+		rc = rule->arfs_id;
+		/* Skip if existing or pending filter already does the right thing */
+		if (!new && rule->rxq_index == rxq_index &&
+		    rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
+			goto out_unlock;
+		rule->rxq_index = rxq_index;
+		rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
+		spin_unlock(&efx->rps_hash_lock);
+	} else {
+		/* Without an ARFS hash table, we just use arfs_id 0 for all
+		 * filters.  This means if multiple flows hash to the same
+		 * flow_id, all but the most recently touched will be eligible
+		 * for expiry.
+		 */
+		rc = 0;
+	}
+
+	/* Queue the request */
+	dev_hold(req->net_dev = net_dev);
+	INIT_WORK(&req->work, efx_filter_rfs_work);
+	req->rxq_index = rxq_index;
+	req->flow_id = flow_id;
+	schedule_work(&req->work);
+	return rc;
+out_unlock:
+	spin_unlock(&efx->rps_hash_lock);
+out_clear:
+	clear_bit(slot_idx, &efx->rps_slot_map);
+	return rc;
+}
+
+bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
+{
+	bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
+	struct efx_nic *efx = channel->efx;
+	unsigned int index, size, start;
+	u32 flow_id;
+
+	if (!mutex_trylock(&efx->rps_mutex))
+		return false;
+	expire_one = efx->type->filter_rfs_expire_one;
+	index = channel->rfs_expire_index;
+	start = index;
+	size = efx->type->max_rx_ip_filters;
+	while (quota) {
+		flow_id = channel->rps_flow_id[index];
+
+		if (flow_id != RPS_FLOW_ID_INVALID) {
+			quota--;
+			if (expire_one(efx, flow_id, index)) {
+				netif_info(efx, rx_status, efx->net_dev,
+					   "expired filter %d [channel %u flow %u]\n",
+					   index, channel->channel, flow_id);
+				channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
+				channel->rfs_filter_count--;
+			}
+		}
+		if (++index == size)
+			index = 0;
+		/* If we were called with a quota that exceeds the total number
+		 * of filters in the table (which shouldn't happen, but could
+		 * if two callers race), ensure that we don't loop forever -
+		 * stop when we've examined every row of the table.
+		 */
+		if (index == start)
+			break;
+	}
+
+	channel->rfs_expire_index = index;
+	mutex_unlock(&efx->rps_mutex);
+	return true;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
--- a/drivers/net/ethernet/sfc/rx_common.h
+++ b/drivers/net/ethernet/sfc/rx_common.h
@@ -67,7 +67,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);

 void
 efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
-		  unsigned int n_frags, u8 *eh);
+		  unsigned int n_frags, u8 *eh, __wsum csum);

 struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
 struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
@@ -89,6 +89,10 @@ struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
 				       const struct efx_filter_spec *spec,
 				       bool *new);
 void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
+
+int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+		   u16 rxq_index, u32 flow_id);
+bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota);
 #endif

 int efx_probe_filters(struct efx_nic *efx);

--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -276,6 +276,7 @@ static int siena_probe_nic(struct efx_nic *efx)
 	}

 	efx->max_channels = EFX_MAX_CHANNELS;
+	efx->max_vis = EFX_MAX_CHANNELS;
 	efx->max_tx_channels = EFX_MAX_CHANNELS;

 	efx_reado(efx, &reg, FR_AZ_CS_DEBUG);