Commit 17af2c47 authored by David S. Miller's avatar David S. Miller

Merge branch 'sfc-prerequisites-for-EF100-driver-part-1'

Edward Cree says:

====================
sfc: prerequisites for EF100 driver, part 1

This continues the work started by Alex Maftei <amaftei@solarflare.com>
 in the series "sfc: code refactoring", "sfc: more code refactoring",
 "sfc: even more code refactoring" and "sfc: refactor mcdi filtering
 code", to prepare for a new driver which will share much of the code
 to support the new EF100 family of Solarflare/Xilinx NICs.
After this series, there will be approximately two more of these
 'prerequisites' series, followed by the sfc_ef100 driver itself.

v2: fix reverse xmas tree in patch 5.  (Left the cases in patches 7,
 9 and 14 alone as those are all in pure movement of existing code.)
====================
Reviewed-by: default avatarJakub Kicinski <kuba@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5f035af7 4d9c0a2d
...@@ -282,7 +282,10 @@ typedef union efx_oword { ...@@ -282,7 +282,10 @@ typedef union efx_oword {
field7, value7, \ field7, value7, \
field8, value8, \ field8, value8, \
field9, value9, \ field9, value9, \
field10, value10) \ field10, value10, \
field11, value11, \
field12, value12, \
field13, value13) \
(EFX_INSERT_FIELD_NATIVE((min), (max), field1, (value1)) | \ (EFX_INSERT_FIELD_NATIVE((min), (max), field1, (value1)) | \
EFX_INSERT_FIELD_NATIVE((min), (max), field2, (value2)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field2, (value2)) | \
EFX_INSERT_FIELD_NATIVE((min), (max), field3, (value3)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field3, (value3)) | \
...@@ -292,7 +295,10 @@ typedef union efx_oword { ...@@ -292,7 +295,10 @@ typedef union efx_oword {
EFX_INSERT_FIELD_NATIVE((min), (max), field7, (value7)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field7, (value7)) | \
EFX_INSERT_FIELD_NATIVE((min), (max), field8, (value8)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field8, (value8)) | \
EFX_INSERT_FIELD_NATIVE((min), (max), field9, (value9)) | \ EFX_INSERT_FIELD_NATIVE((min), (max), field9, (value9)) | \
EFX_INSERT_FIELD_NATIVE((min), (max), field10, (value10))) EFX_INSERT_FIELD_NATIVE((min), (max), field10, (value10)) | \
EFX_INSERT_FIELD_NATIVE((min), (max), field11, (value11)) | \
EFX_INSERT_FIELD_NATIVE((min), (max), field12, (value12)) | \
EFX_INSERT_FIELD_NATIVE((min), (max), field13, (value13)))
#define EFX_INSERT_FIELDS64(...) \ #define EFX_INSERT_FIELDS64(...) \
cpu_to_le64(EFX_INSERT_FIELDS_NATIVE(__VA_ARGS__)) cpu_to_le64(EFX_INSERT_FIELDS_NATIVE(__VA_ARGS__))
...@@ -334,7 +340,13 @@ typedef union efx_oword { ...@@ -334,7 +340,13 @@ typedef union efx_oword {
#endif #endif
/* Populate an octword field with various numbers of arguments */ /* Populate an octword field with various numbers of arguments */
#define EFX_POPULATE_OWORD_10 EFX_POPULATE_OWORD #define EFX_POPULATE_OWORD_13 EFX_POPULATE_OWORD
#define EFX_POPULATE_OWORD_12(oword, ...) \
EFX_POPULATE_OWORD_13(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_OWORD_11(oword, ...) \
EFX_POPULATE_OWORD_12(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_OWORD_10(oword, ...) \
EFX_POPULATE_OWORD_11(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_OWORD_9(oword, ...) \ #define EFX_POPULATE_OWORD_9(oword, ...) \
EFX_POPULATE_OWORD_10(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) EFX_POPULATE_OWORD_10(oword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_OWORD_8(oword, ...) \ #define EFX_POPULATE_OWORD_8(oword, ...) \
...@@ -363,7 +375,13 @@ typedef union efx_oword { ...@@ -363,7 +375,13 @@ typedef union efx_oword {
EFX_DWORD_3, 0xffffffff) EFX_DWORD_3, 0xffffffff)
/* Populate a quadword field with various numbers of arguments */ /* Populate a quadword field with various numbers of arguments */
#define EFX_POPULATE_QWORD_10 EFX_POPULATE_QWORD #define EFX_POPULATE_QWORD_13 EFX_POPULATE_QWORD
#define EFX_POPULATE_QWORD_12(qword, ...) \
EFX_POPULATE_QWORD_13(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_QWORD_11(qword, ...) \
EFX_POPULATE_QWORD_12(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_QWORD_10(qword, ...) \
EFX_POPULATE_QWORD_11(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_QWORD_9(qword, ...) \ #define EFX_POPULATE_QWORD_9(qword, ...) \
EFX_POPULATE_QWORD_10(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) EFX_POPULATE_QWORD_10(qword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_QWORD_8(qword, ...) \ #define EFX_POPULATE_QWORD_8(qword, ...) \
...@@ -390,7 +408,13 @@ typedef union efx_oword { ...@@ -390,7 +408,13 @@ typedef union efx_oword {
EFX_DWORD_1, 0xffffffff) EFX_DWORD_1, 0xffffffff)
/* Populate a dword field with various numbers of arguments */ /* Populate a dword field with various numbers of arguments */
#define EFX_POPULATE_DWORD_10 EFX_POPULATE_DWORD #define EFX_POPULATE_DWORD_13 EFX_POPULATE_DWORD
#define EFX_POPULATE_DWORD_12(dword, ...) \
EFX_POPULATE_DWORD_13(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_DWORD_11(dword, ...) \
EFX_POPULATE_DWORD_12(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_DWORD_10(dword, ...) \
EFX_POPULATE_DWORD_11(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_DWORD_9(dword, ...) \ #define EFX_POPULATE_DWORD_9(dword, ...) \
EFX_POPULATE_DWORD_10(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__) EFX_POPULATE_DWORD_10(dword, EFX_DUMMY_FIELD, 0, __VA_ARGS__)
#define EFX_POPULATE_DWORD_8(dword, ...) \ #define EFX_POPULATE_DWORD_8(dword, ...) \
......
...@@ -601,10 +601,14 @@ static int efx_ef10_probe(struct efx_nic *efx) ...@@ -601,10 +601,14 @@ static int efx_ef10_probe(struct efx_nic *efx)
* However, until we use TX option descriptors we need two TX queues * However, until we use TX option descriptors we need two TX queues
* per channel. * per channel.
*/ */
efx->max_channels = min_t(unsigned int, efx->max_vis = efx_ef10_mem_map_size(efx) / efx->vi_stride;
EFX_MAX_CHANNELS, if (!efx->max_vis) {
efx_ef10_mem_map_size(efx) / netif_err(efx, drv, efx->net_dev, "error determining max VIs\n");
(efx->vi_stride * EFX_TXQ_TYPES)); rc = -EIO;
goto fail5;
}
efx->max_channels = min_t(unsigned int, EFX_MAX_CHANNELS,
efx->max_vis / EFX_TXQ_TYPES);
efx->max_tx_channels = efx->max_channels; efx->max_tx_channels = efx->max_channels;
if (WARN_ON(efx->max_channels == 0)) { if (WARN_ON(efx->max_channels == 0)) {
rc = -EIO; rc = -EIO;
...@@ -1129,6 +1133,12 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) ...@@ -1129,6 +1133,12 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
((efx->n_tx_channels + efx->n_extra_tx_channels) * ((efx->n_tx_channels + efx->n_extra_tx_channels) *
EFX_TXQ_TYPES) + EFX_TXQ_TYPES) +
efx->n_xdp_channels * efx->xdp_tx_per_channel); efx->n_xdp_channels * efx->xdp_tx_per_channel);
if (efx->max_vis && efx->max_vis < channel_vis) {
netif_dbg(efx, drv, efx->net_dev,
"Reducing channel VIs from %u to %u\n",
channel_vis, efx->max_vis);
channel_vis = efx->max_vis;
}
#ifdef EFX_USE_PIO #ifdef EFX_USE_PIO
/* Try to allocate PIO buffers if wanted and if the full /* Try to allocate PIO buffers if wanted and if the full
...@@ -1269,6 +1279,14 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) ...@@ -1269,6 +1279,14 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
return 0; return 0;
} }
static void efx_ef10_fini_nic(struct efx_nic *efx)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
kfree(nic_data->mc_stats);
nic_data->mc_stats = NULL;
}
static int efx_ef10_init_nic(struct efx_nic *efx) static int efx_ef10_init_nic(struct efx_nic *efx)
{ {
struct efx_ef10_nic_data *nic_data = efx->nic_data; struct efx_ef10_nic_data *nic_data = efx->nic_data;
...@@ -1290,6 +1308,11 @@ static int efx_ef10_init_nic(struct efx_nic *efx) ...@@ -1290,6 +1308,11 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
efx->must_realloc_vis = false; efx->must_realloc_vis = false;
} }
nic_data->mc_stats = kmalloc(efx->num_mac_stats * sizeof(__le64),
GFP_KERNEL);
if (!nic_data->mc_stats)
return -ENOMEM;
if (nic_data->must_restore_piobufs && nic_data->n_piobufs) { if (nic_data->must_restore_piobufs && nic_data->n_piobufs) {
rc = efx_ef10_alloc_piobufs(efx, nic_data->n_piobufs); rc = efx_ef10_alloc_piobufs(efx, nic_data->n_piobufs);
if (rc == 0) { if (rc == 0) {
...@@ -1410,8 +1433,6 @@ static int efx_ef10_reset(struct efx_nic *efx, enum reset_type reset_type) ...@@ -1410,8 +1433,6 @@ static int efx_ef10_reset(struct efx_nic *efx, enum reset_type reset_type)
{ NULL, 64, 8 * MC_CMD_MAC_ ## mcdi_name } { NULL, 64, 8 * MC_CMD_MAC_ ## mcdi_name }
#define EF10_OTHER_STAT(ext_name) \ #define EF10_OTHER_STAT(ext_name) \
[EF10_STAT_ ## ext_name] = { #ext_name, 0, 0 } [EF10_STAT_ ## ext_name] = { #ext_name, 0, 0 }
#define GENERIC_SW_STAT(ext_name) \
[GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = {
EF10_DMA_STAT(port_tx_bytes, TX_BYTES), EF10_DMA_STAT(port_tx_bytes, TX_BYTES),
...@@ -1455,8 +1476,8 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { ...@@ -1455,8 +1476,8 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = {
EF10_DMA_STAT(port_rx_align_error, RX_ALIGN_ERROR_PKTS), EF10_DMA_STAT(port_rx_align_error, RX_ALIGN_ERROR_PKTS),
EF10_DMA_STAT(port_rx_length_error, RX_LENGTH_ERROR_PKTS), EF10_DMA_STAT(port_rx_length_error, RX_LENGTH_ERROR_PKTS),
EF10_DMA_STAT(port_rx_nodesc_drops, RX_NODESC_DROPS), EF10_DMA_STAT(port_rx_nodesc_drops, RX_NODESC_DROPS),
GENERIC_SW_STAT(rx_nodesc_trunc), EFX_GENERIC_SW_STAT(rx_nodesc_trunc),
GENERIC_SW_STAT(rx_noskb_drops), EFX_GENERIC_SW_STAT(rx_noskb_drops),
EF10_DMA_STAT(port_rx_pm_trunc_bb_overflow, PM_TRUNC_BB_OVERFLOW), EF10_DMA_STAT(port_rx_pm_trunc_bb_overflow, PM_TRUNC_BB_OVERFLOW),
EF10_DMA_STAT(port_rx_pm_discard_bb_overflow, PM_DISCARD_BB_OVERFLOW), EF10_DMA_STAT(port_rx_pm_discard_bb_overflow, PM_DISCARD_BB_OVERFLOW),
EF10_DMA_STAT(port_rx_pm_trunc_vfifo_full, PM_TRUNC_VFIFO_FULL), EF10_DMA_STAT(port_rx_pm_trunc_vfifo_full, PM_TRUNC_VFIFO_FULL),
...@@ -1765,55 +1786,42 @@ static size_t efx_ef10_update_stats_common(struct efx_nic *efx, u64 *full_stats, ...@@ -1765,55 +1786,42 @@ static size_t efx_ef10_update_stats_common(struct efx_nic *efx, u64 *full_stats,
return stats_count; return stats_count;
} }
static int efx_ef10_try_update_nic_stats_pf(struct efx_nic *efx) static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats,
struct rtnl_link_stats64 *core_stats)
{ {
struct efx_ef10_nic_data *nic_data = efx->nic_data; struct efx_ef10_nic_data *nic_data = efx->nic_data;
DECLARE_BITMAP(mask, EF10_STAT_COUNT); DECLARE_BITMAP(mask, EF10_STAT_COUNT);
__le64 generation_start, generation_end;
u64 *stats = nic_data->stats; u64 *stats = nic_data->stats;
__le64 *dma_stats;
efx_ef10_get_stat_mask(efx, mask); efx_ef10_get_stat_mask(efx, mask);
dma_stats = efx->stats_buffer.addr; efx_nic_copy_stats(efx, nic_data->mc_stats);
efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT,
generation_end = dma_stats[efx->num_mac_stats - 1]; mask, stats, nic_data->mc_stats, false);
if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
return 0;
rmb();
efx_nic_update_stats(efx_ef10_stat_desc, EF10_STAT_COUNT, mask,
stats, efx->stats_buffer.addr, false);
rmb();
generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
if (generation_end != generation_start)
return -EAGAIN;
/* Update derived statistics */ /* Update derived statistics */
efx_nic_fix_nodesc_drop_stat(efx, efx_nic_fix_nodesc_drop_stat(efx,
&stats[EF10_STAT_port_rx_nodesc_drops]); &stats[EF10_STAT_port_rx_nodesc_drops]);
/* MC Firmware reads RX_BYTES and RX_GOOD_BYTES from the MAC.
* It then calculates RX_BAD_BYTES and DMAs it to us with RX_BYTES.
* We report these as port_rx_ stats. We are not given RX_GOOD_BYTES.
* Here we calculate port_rx_good_bytes.
*/
stats[EF10_STAT_port_rx_good_bytes] = stats[EF10_STAT_port_rx_good_bytes] =
stats[EF10_STAT_port_rx_bytes] - stats[EF10_STAT_port_rx_bytes] -
stats[EF10_STAT_port_rx_bytes_minus_good_bytes]; stats[EF10_STAT_port_rx_bytes_minus_good_bytes];
/* The asynchronous reads used to calculate RX_BAD_BYTES in
* MC Firmware are done such that we should not see an increase in
* RX_BAD_BYTES when a good packet has arrived. Unfortunately this
* does mean that the stat can decrease at times. Here we do not
* update the stat unless it has increased or has gone to zero
* (In the case of the NIC rebooting).
* Please see Bug 33781 for a discussion of why things work this way.
*/
efx_update_diff_stat(&stats[EF10_STAT_port_rx_bad_bytes], efx_update_diff_stat(&stats[EF10_STAT_port_rx_bad_bytes],
stats[EF10_STAT_port_rx_bytes_minus_good_bytes]); stats[EF10_STAT_port_rx_bytes_minus_good_bytes]);
efx_update_sw_stats(efx, stats); efx_update_sw_stats(efx, stats);
return 0;
}
static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats,
struct rtnl_link_stats64 *core_stats)
{
int retry;
/* If we're unlucky enough to read statistics during the DMA, wait
* up to 10ms for it to finish (typically takes <500us)
*/
for (retry = 0; retry < 100; ++retry) {
if (efx_ef10_try_update_nic_stats_pf(efx) == 0)
break;
udelay(100);
}
return efx_ef10_update_stats_common(efx, full_stats, core_stats); return efx_ef10_update_stats_common(efx, full_stats, core_stats);
} }
...@@ -3109,14 +3117,6 @@ static void efx_ef10_ev_test_generate(struct efx_channel *channel) ...@@ -3109,14 +3117,6 @@ static void efx_ef10_ev_test_generate(struct efx_channel *channel)
netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
} }
void efx_ef10_handle_drain_event(struct efx_nic *efx)
{
if (atomic_dec_and_test(&efx->active_queues))
wake_up(&efx->flush_wq);
WARN_ON(atomic_read(&efx->active_queues) < 0);
}
static int efx_ef10_fini_dmaq(struct efx_nic *efx) static int efx_ef10_fini_dmaq(struct efx_nic *efx)
{ {
struct efx_tx_queue *tx_queue; struct efx_tx_queue *tx_queue;
...@@ -4023,7 +4023,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { ...@@ -4023,7 +4023,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.remove = efx_ef10_remove, .remove = efx_ef10_remove,
.dimension_resources = efx_ef10_dimension_resources, .dimension_resources = efx_ef10_dimension_resources,
.init = efx_ef10_init_nic, .init = efx_ef10_init_nic,
.fini = efx_port_dummy_op_void, .fini = efx_ef10_fini_nic,
.map_reset_reason = efx_ef10_map_reset_reason, .map_reset_reason = efx_ef10_map_reset_reason,
.map_reset_flags = efx_ef10_map_reset_flags, .map_reset_flags = efx_ef10_map_reset_flags,
.reset = efx_ef10_reset, .reset = efx_ef10_reset,
...@@ -4132,7 +4132,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { ...@@ -4132,7 +4132,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.remove = efx_ef10_remove, .remove = efx_ef10_remove,
.dimension_resources = efx_ef10_dimension_resources, .dimension_resources = efx_ef10_dimension_resources,
.init = efx_ef10_init_nic, .init = efx_ef10_init_nic,
.fini = efx_port_dummy_op_void, .fini = efx_ef10_fini_nic,
.map_reset_reason = efx_ef10_map_reset_reason, .map_reset_reason = efx_ef10_map_reset_reason,
.map_reset_flags = efx_ef10_map_reset_flags, .map_reset_flags = efx_ef10_map_reset_flags,
.reset = efx_ef10_reset, .reset = efx_ef10_reset,
......
...@@ -133,30 +133,6 @@ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs, ...@@ -133,30 +133,6 @@ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
* *
**************************************************************************/ **************************************************************************/
/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
* force the Autoneg bit on.
*/
void efx_link_clear_advertising(struct efx_nic *efx)
{
bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
}
void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
{
efx->wanted_fc = wanted_fc;
if (efx->link_advertising[0]) {
if (wanted_fc & EFX_FC_RX)
efx->link_advertising[0] |= (ADVERTISED_Pause |
ADVERTISED_Asym_Pause);
else
efx->link_advertising[0] &= ~(ADVERTISED_Pause |
ADVERTISED_Asym_Pause);
if (wanted_fc & EFX_FC_TX)
efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
}
}
static void efx_fini_port(struct efx_nic *efx); static void efx_fini_port(struct efx_nic *efx);
static int efx_probe_port(struct efx_nic *efx) static int efx_probe_port(struct efx_nic *efx)
...@@ -1098,7 +1074,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev) ...@@ -1098,7 +1074,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
efx_pci_remove_main(efx); efx_pci_remove_main(efx);
efx_fini_io(efx, efx->type->mem_bar(efx)); efx_fini_io(efx);
netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n"); netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
efx_fini_struct(efx); efx_fini_struct(efx);
...@@ -1366,7 +1342,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, ...@@ -1366,7 +1342,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
return 0; return 0;
fail3: fail3:
efx_fini_io(efx, efx->type->mem_bar(efx)); efx_fini_io(efx);
fail2: fail2:
efx_fini_struct(efx); efx_fini_struct(efx);
fail1: fail1:
...@@ -1514,97 +1490,6 @@ static const struct dev_pm_ops efx_pm_ops = { ...@@ -1514,97 +1490,6 @@ static const struct dev_pm_ops efx_pm_ops = {
.restore = efx_pm_resume, .restore = efx_pm_resume,
}; };
/* A PCI error affecting this device was detected.
* At this point MMIO and DMA may be disabled.
* Stop the software path and request a slot reset.
*/
static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
enum pci_channel_state state)
{
pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
struct efx_nic *efx = pci_get_drvdata(pdev);
if (state == pci_channel_io_perm_failure)
return PCI_ERS_RESULT_DISCONNECT;
rtnl_lock();
if (efx->state != STATE_DISABLED) {
efx->state = STATE_RECOVERY;
efx->reset_pending = 0;
efx_device_detach_sync(efx);
efx_stop_all(efx);
efx_disable_interrupts(efx);
status = PCI_ERS_RESULT_NEED_RESET;
} else {
/* If the interface is disabled we don't want to do anything
* with it.
*/
status = PCI_ERS_RESULT_RECOVERED;
}
rtnl_unlock();
pci_disable_device(pdev);
return status;
}
/* Fake a successful reset, which will be performed later in efx_io_resume. */
static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
{
struct efx_nic *efx = pci_get_drvdata(pdev);
pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
if (pci_enable_device(pdev)) {
netif_err(efx, hw, efx->net_dev,
"Cannot re-enable PCI device after reset.\n");
status = PCI_ERS_RESULT_DISCONNECT;
}
return status;
}
/* Perform the actual reset and resume I/O operations. */
static void efx_io_resume(struct pci_dev *pdev)
{
struct efx_nic *efx = pci_get_drvdata(pdev);
int rc;
rtnl_lock();
if (efx->state == STATE_DISABLED)
goto out;
rc = efx_reset(efx, RESET_TYPE_ALL);
if (rc) {
netif_err(efx, hw, efx->net_dev,
"efx_reset failed after PCI error (%d)\n", rc);
} else {
efx->state = STATE_READY;
netif_dbg(efx, hw, efx->net_dev,
"Done resetting and resuming IO after PCI error.\n");
}
out:
rtnl_unlock();
}
/* For simplicity and reliability, we always require a slot reset and try to
* reset the hardware when a pci error affecting the device is detected.
* We leave both the link_reset and mmio_enabled callback unimplemented:
* with our request for slot reset the mmio_enabled callback will never be
* called, and the link_reset callback is not used by AER or EEH mechanisms.
*/
static const struct pci_error_handlers efx_err_handlers = {
.error_detected = efx_io_error_detected,
.slot_reset = efx_io_slot_reset,
.resume = efx_io_resume,
};
static struct pci_driver efx_pci_driver = { static struct pci_driver efx_pci_driver = {
.name = KBUILD_MODNAME, .name = KBUILD_MODNAME,
.id_table = efx_pci_table, .id_table = efx_pci_table,
......
...@@ -147,11 +147,6 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx, ...@@ -147,11 +147,6 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx,
{ {
return efx->type->filter_get_rx_ids(efx, priority, buf, size); return efx->type->filter_get_rx_ids(efx, priority, buf, size);
} }
#ifdef CONFIG_RFS_ACCEL
int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota);
#endif
/* RSS contexts */ /* RSS contexts */
static inline bool efx_rss_active(struct efx_rss_context *ctx) static inline bool efx_rss_active(struct efx_rss_context *ctx)
...@@ -216,9 +211,6 @@ static inline void efx_schedule_channel_irq(struct efx_channel *channel) ...@@ -216,9 +211,6 @@ static inline void efx_schedule_channel_irq(struct efx_channel *channel)
efx_schedule_channel(channel); efx_schedule_channel(channel);
} }
void efx_link_clear_advertising(struct efx_nic *efx);
void efx_link_set_wanted_fc(struct efx_nic *efx, u8);
static inline void efx_device_detach_sync(struct efx_nic *efx) static inline void efx_device_detach_sync(struct efx_nic *efx)
{ {
struct net_device *dev = efx->net_dev; struct net_device *dev = efx->net_dev;
......
...@@ -175,6 +175,13 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, ...@@ -175,6 +175,13 @@ static int efx_allocate_msix_channels(struct efx_nic *efx,
efx->n_xdp_channels = 0; efx->n_xdp_channels = 0;
efx->xdp_tx_per_channel = 0; efx->xdp_tx_per_channel = 0;
efx->xdp_tx_queue_count = 0; efx->xdp_tx_queue_count = 0;
} else if (n_channels + n_xdp_tx > efx->max_vis) {
netif_err(efx, drv, efx->net_dev,
"Insufficient resources for %d XDP TX queues (%d other channels, max VIs %d)\n",
n_xdp_tx, n_channels, efx->max_vis);
efx->n_xdp_channels = 0;
efx->xdp_tx_per_channel = 0;
efx->xdp_tx_queue_count = 0;
} else { } else {
efx->n_xdp_channels = n_xdp_ev; efx->n_xdp_channels = n_xdp_ev;
efx->xdp_tx_per_channel = EFX_TXQ_TYPES; efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
......
...@@ -383,6 +383,30 @@ static void efx_stop_datapath(struct efx_nic *efx) ...@@ -383,6 +383,30 @@ static void efx_stop_datapath(struct efx_nic *efx)
* *
**************************************************************************/ **************************************************************************/
/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
* force the Autoneg bit on.
*/
void efx_link_clear_advertising(struct efx_nic *efx)
{
bitmap_zero(efx->link_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
}
void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
{
efx->wanted_fc = wanted_fc;
if (efx->link_advertising[0]) {
if (wanted_fc & EFX_FC_RX)
efx->link_advertising[0] |= (ADVERTISED_Pause |
ADVERTISED_Asym_Pause);
else
efx->link_advertising[0] &= ~(ADVERTISED_Pause |
ADVERTISED_Asym_Pause);
if (wanted_fc & EFX_FC_TX)
efx->link_advertising[0] ^= ADVERTISED_Asym_Pause;
}
}
static void efx_start_port(struct efx_nic *efx) static void efx_start_port(struct efx_nic *efx)
{ {
netif_dbg(efx, ifup, efx->net_dev, "start port\n"); netif_dbg(efx, ifup, efx->net_dev, "start port\n");
...@@ -929,6 +953,8 @@ int efx_init_struct(struct efx_nic *efx, ...@@ -929,6 +953,8 @@ int efx_init_struct(struct efx_nic *efx,
INIT_WORK(&efx->mac_work, efx_mac_work); INIT_WORK(&efx->mac_work, efx_mac_work);
init_waitqueue_head(&efx->flush_wq); init_waitqueue_head(&efx->flush_wq);
efx->mem_bar = UINT_MAX;
rc = efx_init_channels(efx); rc = efx_init_channels(efx);
if (rc) if (rc)
goto fail; goto fail;
...@@ -972,7 +998,9 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, ...@@ -972,7 +998,9 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
struct pci_dev *pci_dev = efx->pci_dev; struct pci_dev *pci_dev = efx->pci_dev;
int rc; int rc;
netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); efx->mem_bar = UINT_MAX;
netif_dbg(efx, probe, efx->net_dev, "initialising I/O bar=%d\n", bar);
rc = pci_enable_device(pci_dev); rc = pci_enable_device(pci_dev);
if (rc) { if (rc) {
...@@ -1014,21 +1042,21 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, ...@@ -1014,21 +1042,21 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
rc = pci_request_region(pci_dev, bar, "sfc"); rc = pci_request_region(pci_dev, bar, "sfc");
if (rc) { if (rc) {
netif_err(efx, probe, efx->net_dev, netif_err(efx, probe, efx->net_dev,
"request for memory BAR failed\n"); "request for memory BAR[%d] failed\n", bar);
rc = -EIO; rc = -EIO;
goto fail3; goto fail3;
} }
efx->mem_bar = bar;
efx->membase = ioremap(efx->membase_phys, mem_map_size); efx->membase = ioremap(efx->membase_phys, mem_map_size);
if (!efx->membase) { if (!efx->membase) {
netif_err(efx, probe, efx->net_dev, netif_err(efx, probe, efx->net_dev,
"could not map memory BAR at %llx+%x\n", "could not map memory BAR[%d] at %llx+%x\n", bar,
(unsigned long long)efx->membase_phys, mem_map_size); (unsigned long long)efx->membase_phys, mem_map_size);
rc = -ENOMEM; rc = -ENOMEM;
goto fail4; goto fail4;
} }
netif_dbg(efx, probe, efx->net_dev, netif_dbg(efx, probe, efx->net_dev,
"memory BAR at %llx+%x (virtual %p)\n", "memory BAR[%d] at %llx+%x (virtual %p)\n", bar,
(unsigned long long)efx->membase_phys, mem_map_size, (unsigned long long)efx->membase_phys, mem_map_size,
efx->membase); efx->membase);
...@@ -1044,7 +1072,7 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, ...@@ -1044,7 +1072,7 @@ int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
return rc; return rc;
} }
void efx_fini_io(struct efx_nic *efx, int bar) void efx_fini_io(struct efx_nic *efx)
{ {
netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
...@@ -1054,8 +1082,9 @@ void efx_fini_io(struct efx_nic *efx, int bar) ...@@ -1054,8 +1082,9 @@ void efx_fini_io(struct efx_nic *efx, int bar)
} }
if (efx->membase_phys) { if (efx->membase_phys) {
pci_release_region(efx->pci_dev, bar); pci_release_region(efx->pci_dev, efx->mem_bar);
efx->membase_phys = 0; efx->membase_phys = 0;
efx->mem_bar = UINT_MAX;
} }
/* Don't disable bus-mastering if VFs are assigned */ /* Don't disable bus-mastering if VFs are assigned */
...@@ -1101,3 +1130,94 @@ void efx_fini_mcdi_logging(struct efx_nic *efx) ...@@ -1101,3 +1130,94 @@ void efx_fini_mcdi_logging(struct efx_nic *efx)
device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging); device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
} }
#endif #endif
/* A PCI error affecting this device was detected.
* At this point MMIO and DMA may be disabled.
* Stop the software path and request a slot reset.
*/
static pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
enum pci_channel_state state)
{
pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
struct efx_nic *efx = pci_get_drvdata(pdev);
if (state == pci_channel_io_perm_failure)
return PCI_ERS_RESULT_DISCONNECT;
rtnl_lock();
if (efx->state != STATE_DISABLED) {
efx->state = STATE_RECOVERY;
efx->reset_pending = 0;
efx_device_detach_sync(efx);
efx_stop_all(efx);
efx_disable_interrupts(efx);
status = PCI_ERS_RESULT_NEED_RESET;
} else {
/* If the interface is disabled we don't want to do anything
* with it.
*/
status = PCI_ERS_RESULT_RECOVERED;
}
rtnl_unlock();
pci_disable_device(pdev);
return status;
}
/* Fake a successful reset, which will be performed later in efx_io_resume. */
static pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
{
struct efx_nic *efx = pci_get_drvdata(pdev);
pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
if (pci_enable_device(pdev)) {
netif_err(efx, hw, efx->net_dev,
"Cannot re-enable PCI device after reset.\n");
status = PCI_ERS_RESULT_DISCONNECT;
}
return status;
}
/* Perform the actual reset and resume I/O operations. */
static void efx_io_resume(struct pci_dev *pdev)
{
struct efx_nic *efx = pci_get_drvdata(pdev);
int rc;
rtnl_lock();
if (efx->state == STATE_DISABLED)
goto out;
rc = efx_reset(efx, RESET_TYPE_ALL);
if (rc) {
netif_err(efx, hw, efx->net_dev,
"efx_reset failed after PCI error (%d)\n", rc);
} else {
efx->state = STATE_READY;
netif_dbg(efx, hw, efx->net_dev,
"Done resetting and resuming IO after PCI error.\n");
}
out:
rtnl_unlock();
}
/* For simplicity and reliability, we always require a slot reset and try to
* reset the hardware when a pci error affecting the device is detected.
* We leave both the link_reset and mmio_enabled callback unimplemented:
* with our request for slot reset the mmio_enabled callback will never be
* called, and the link_reset callback is not used by AER or EEH mechanisms.
*/
const struct pci_error_handlers efx_err_handlers = {
.error_detected = efx_io_error_detected,
.slot_reset = efx_io_slot_reset,
.resume = efx_io_resume,
};
...@@ -13,11 +13,14 @@ ...@@ -13,11 +13,14 @@
int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask, int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
unsigned int mem_map_size); unsigned int mem_map_size);
void efx_fini_io(struct efx_nic *efx, int bar); void efx_fini_io(struct efx_nic *efx);
int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev, int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev,
struct net_device *net_dev); struct net_device *net_dev);
void efx_fini_struct(struct efx_nic *efx); void efx_fini_struct(struct efx_nic *efx);
void efx_link_clear_advertising(struct efx_nic *efx);
void efx_link_set_wanted_fc(struct efx_nic *efx, u8);
void efx_start_all(struct efx_nic *efx); void efx_start_all(struct efx_nic *efx);
void efx_stop_all(struct efx_nic *efx); void efx_stop_all(struct efx_nic *efx);
...@@ -70,4 +73,5 @@ void efx_link_status_changed(struct efx_nic *efx); ...@@ -70,4 +73,5 @@ void efx_link_status_changed(struct efx_nic *efx);
unsigned int efx_xdp_max_mtu(struct efx_nic *efx); unsigned int efx_xdp_max_mtu(struct efx_nic *efx);
int efx_change_mtu(struct net_device *net_dev, int new_mtu); int efx_change_mtu(struct net_device *net_dev, int new_mtu);
extern const struct pci_error_handlers efx_err_handlers;
#endif #endif
This diff is collapsed.
This diff is collapsed.
...@@ -15,8 +15,13 @@ void efx_ethtool_get_drvinfo(struct net_device *net_dev, ...@@ -15,8 +15,13 @@ void efx_ethtool_get_drvinfo(struct net_device *net_dev,
struct ethtool_drvinfo *info); struct ethtool_drvinfo *info);
u32 efx_ethtool_get_msglevel(struct net_device *net_dev); u32 efx_ethtool_get_msglevel(struct net_device *net_dev);
void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable); void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable);
void efx_ethtool_self_test(struct net_device *net_dev,
struct ethtool_test *test, u64 *data);
int efx_ethtool_nway_reset(struct net_device *net_dev);
void efx_ethtool_get_pauseparam(struct net_device *net_dev, void efx_ethtool_get_pauseparam(struct net_device *net_dev,
struct ethtool_pauseparam *pause); struct ethtool_pauseparam *pause);
int efx_ethtool_set_pauseparam(struct net_device *net_dev,
struct ethtool_pauseparam *pause);
int efx_ethtool_fill_self_tests(struct efx_nic *efx, int efx_ethtool_fill_self_tests(struct efx_nic *efx,
struct efx_self_tests *tests, struct efx_self_tests *tests,
u8 *strings, u64 *data); u8 *strings, u64 *data);
...@@ -26,5 +31,34 @@ void efx_ethtool_get_strings(struct net_device *net_dev, u32 string_set, ...@@ -26,5 +31,34 @@ void efx_ethtool_get_strings(struct net_device *net_dev, u32 string_set,
void efx_ethtool_get_stats(struct net_device *net_dev, void efx_ethtool_get_stats(struct net_device *net_dev,
struct ethtool_stats *stats __attribute__ ((unused)), struct ethtool_stats *stats __attribute__ ((unused)),
u64 *data); u64 *data);
int efx_ethtool_get_link_ksettings(struct net_device *net_dev,
struct ethtool_link_ksettings *out);
int efx_ethtool_set_link_ksettings(struct net_device *net_dev,
const struct ethtool_link_ksettings *settings);
int efx_ethtool_get_fecparam(struct net_device *net_dev,
struct ethtool_fecparam *fecparam);
int efx_ethtool_set_fecparam(struct net_device *net_dev,
struct ethtool_fecparam *fecparam);
int efx_ethtool_get_rxnfc(struct net_device *net_dev,
struct ethtool_rxnfc *info, u32 *rule_locs);
int efx_ethtool_set_rxnfc(struct net_device *net_dev,
struct ethtool_rxnfc *info);
u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev);
u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev);
int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
u8 *hfunc);
int efx_ethtool_set_rxfh(struct net_device *net_dev,
const u32 *indir, const u8 *key, const u8 hfunc);
int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir,
u8 *key, u8 *hfunc, u32 rss_context);
int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
const u32 *indir, const u8 *key,
const u8 hfunc, u32 *rss_context,
bool delete);
int efx_ethtool_reset(struct net_device *net_dev, u32 *flags);
int efx_ethtool_get_module_eeprom(struct net_device *net_dev,
struct ethtool_eeprom *ee,
u8 *data);
int efx_ethtool_get_module_info(struct net_device *net_dev,
struct ethtool_modinfo *modinfo);
#endif #endif
...@@ -1299,6 +1299,14 @@ static void efx_mcdi_abandon(struct efx_nic *efx) ...@@ -1299,6 +1299,14 @@ static void efx_mcdi_abandon(struct efx_nic *efx)
efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT); efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT);
} }
static void efx_handle_drain_event(struct efx_nic *efx)
{
if (atomic_dec_and_test(&efx->active_queues))
wake_up(&efx->flush_wq);
WARN_ON(atomic_read(&efx->active_queues) < 0);
}
/* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */ /* Called from efx_farch_ev_process and efx_ef10_ev_process for MCDI events */
void efx_mcdi_process_event(struct efx_channel *channel, void efx_mcdi_process_event(struct efx_channel *channel,
efx_qword_t *event) efx_qword_t *event)
...@@ -1371,7 +1379,7 @@ void efx_mcdi_process_event(struct efx_channel *channel, ...@@ -1371,7 +1379,7 @@ void efx_mcdi_process_event(struct efx_channel *channel,
BUILD_BUG_ON(MCDI_EVENT_TX_FLUSH_TO_DRIVER_LBN != BUILD_BUG_ON(MCDI_EVENT_TX_FLUSH_TO_DRIVER_LBN !=
MCDI_EVENT_RX_FLUSH_TO_DRIVER_LBN); MCDI_EVENT_RX_FLUSH_TO_DRIVER_LBN);
if (!MCDI_EVENT_FIELD(*event, TX_FLUSH_TO_DRIVER)) if (!MCDI_EVENT_FIELD(*event, TX_FLUSH_TO_DRIVER))
efx_ef10_handle_drain_event(efx); efx_handle_drain_event(efx);
break; break;
case MCDI_EVENT_CODE_TX_ERR: case MCDI_EVENT_CODE_TX_ERR:
case MCDI_EVENT_CODE_RX_ERR: case MCDI_EVENT_CODE_RX_ERR:
......
...@@ -332,10 +332,9 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); ...@@ -332,10 +332,9 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev);
#define MCDI_CAPABILITY_OFST(field) \ #define MCDI_CAPABILITY_OFST(field) \
MC_CMD_GET_CAPABILITIES_V4_OUT_ ## field ## _OFST MC_CMD_GET_CAPABILITIES_V4_OUT_ ## field ## _OFST
/* field is FLAGS1 or FLAGS2 */ #define efx_has_cap(efx, field) \
#define efx_has_cap(efx, flag, field) \
efx->type->check_caps(efx, \ efx->type->check_caps(efx, \
MCDI_CAPABILITY(flag), \ MCDI_CAPABILITY(field), \
MCDI_CAPABILITY_OFST(field)) MCDI_CAPABILITY_OFST(field))
void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len); void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len);
......
...@@ -828,7 +828,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx, ...@@ -828,7 +828,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
efx_filter_set_uc_def(&spec); efx_filter_set_uc_def(&spec);
if (encap_type) { if (encap_type) {
if (efx_has_cap(efx, VXLAN_NVGRE, FLAGS1)) if (efx_has_cap(efx, VXLAN_NVGRE))
efx_filter_set_encap_type(&spec, encap_type); efx_filter_set_encap_type(&spec, encap_type);
else else
/* /*
...@@ -1304,7 +1304,7 @@ int efx_mcdi_filter_table_probe(struct efx_nic *efx, bool multicast_chaining) ...@@ -1304,7 +1304,7 @@ int efx_mcdi_filter_table_probe(struct efx_nic *efx, bool multicast_chaining)
rc = efx_mcdi_filter_table_probe_matches(efx, table, false); rc = efx_mcdi_filter_table_probe_matches(efx, table, false);
if (rc) if (rc)
goto fail; goto fail;
if (efx_has_cap(efx, VXLAN_NVGRE, FLAGS1)) if (efx_has_cap(efx, VXLAN_NVGRE))
rc = efx_mcdi_filter_table_probe_matches(efx, table, true); rc = efx_mcdi_filter_table_probe_matches(efx, table, true);
if (rc) if (rc)
goto fail; goto fail;
...@@ -1927,7 +1927,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive ...@@ -1927,7 +1927,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive
return 0; return 0;
} }
if (efx_has_cap(efx, RX_RSS_LIMITED, FLAGS1)) if (efx_has_cap(efx, RX_RSS_LIMITED))
return -EOPNOTSUPP; return -EOPNOTSUPP;
MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID, MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
...@@ -1948,7 +1948,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive ...@@ -1948,7 +1948,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive
if (context_size) if (context_size)
*context_size = rss_spread; *context_size = rss_spread;
if (efx_has_cap(efx, ADDITIONAL_RSS_MODES, FLAGS1)) if (efx_has_cap(efx, ADDITIONAL_RSS_MODES))
efx_mcdi_set_rss_context_flags(efx, ctx); efx_mcdi_set_rss_context_flags(efx, ctx);
return 0; return 0;
......
This diff is collapsed.
...@@ -961,6 +961,7 @@ struct efx_async_filter_insertion { ...@@ -961,6 +961,7 @@ struct efx_async_filter_insertion {
* @vpd_sn: Serial number read from VPD * @vpd_sn: Serial number read from VPD
* @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their * @xdp_rxq_info_failed: Have any of the rx queues failed to initialise their
* xdp_rxq_info structures? * xdp_rxq_info structures?
* @mem_bar: The BAR that is mapped into membase.
* @monitor_work: Hardware monitor workitem * @monitor_work: Hardware monitor workitem
* @biu_lock: BIU (bus interface unit) lock * @biu_lock: BIU (bus interface unit) lock
* @last_irq_cpu: Last CPU to handle a possible test interrupt. This * @last_irq_cpu: Last CPU to handle a possible test interrupt. This
...@@ -1022,6 +1023,7 @@ struct efx_nic { ...@@ -1022,6 +1023,7 @@ struct efx_nic {
unsigned next_buffer_table; unsigned next_buffer_table;
unsigned int max_channels; unsigned int max_channels;
unsigned int max_vis;
unsigned int max_tx_channels; unsigned int max_tx_channels;
unsigned n_channels; unsigned n_channels;
unsigned n_rx_channels; unsigned n_rx_channels;
...@@ -1136,6 +1138,8 @@ struct efx_nic { ...@@ -1136,6 +1138,8 @@ struct efx_nic {
char *vpd_sn; char *vpd_sn;
bool xdp_rxq_info_failed; bool xdp_rxq_info_failed;
unsigned int mem_bar;
/* The following fields may be written more often */ /* The following fields may be written more often */
struct delayed_work monitor_work ____cacheline_aligned_in_smp; struct delayed_work monitor_work ____cacheline_aligned_in_smp;
......
...@@ -20,6 +20,8 @@ ...@@ -20,6 +20,8 @@
#include "farch_regs.h" #include "farch_regs.h"
#include "io.h" #include "io.h"
#include "workarounds.h" #include "workarounds.h"
#include "mcdi_port_common.h"
#include "mcdi_pcol.h"
/************************************************************************** /**************************************************************************
* *
...@@ -470,6 +472,49 @@ size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count, ...@@ -470,6 +472,49 @@ size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count,
return visible; return visible;
} }
/**
* efx_nic_copy_stats - Copy stats from the DMA buffer in to an
* intermediate buffer. This is used to get a consistent
* set of stats while the DMA buffer can be written at any time
* by the NIC.
* @efx: The associated NIC.
* @dest: Destination buffer. Must be the same size as the DMA buffer.
*/
int efx_nic_copy_stats(struct efx_nic *efx, __le64 *dest)
{
__le64 *dma_stats = efx->stats_buffer.addr;
__le64 generation_start, generation_end;
int rc = 0, retry;
if (!dest)
return 0;
if (!dma_stats)
goto return_zeroes;
/* If we're unlucky enough to read statistics during the DMA, wait
* up to 10ms for it to finish (typically takes <500us)
*/
for (retry = 0; retry < 100; ++retry) {
generation_end = dma_stats[efx->num_mac_stats - 1];
if (generation_end == EFX_MC_STATS_GENERATION_INVALID)
goto return_zeroes;
rmb();
memcpy(dest, dma_stats, efx->num_mac_stats * sizeof(__le64));
rmb();
generation_start = dma_stats[MC_CMD_MAC_GENERATION_START];
if (generation_end == generation_start)
return 0; /* return good data */
udelay(100);
}
rc = -EIO;
return_zeroes:
memset(dest, 0, efx->num_mac_stats * sizeof(u64));
return rc;
}
/** /**
* efx_nic_update_stats - Convert statistics DMA buffer to array of u64 * efx_nic_update_stats - Convert statistics DMA buffer to array of u64
* @desc: Array of &struct efx_hw_stat_desc describing the DMA buffer * @desc: Array of &struct efx_hw_stat_desc describing the DMA buffer
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only */
/****************************************************************************
* Driver for Solarflare network controllers and boards
* Copyright 2005-2006 Fen Systems Ltd.
* Copyright 2006-2013 Solarflare Communications Inc.
* Copyright 2019-2020 Xilinx Inc.
*/
#ifndef EFX_NIC_COMMON_H
#define EFX_NIC_COMMON_H
#include "net_driver.h"
#include "efx_common.h"
#include "mcdi.h"
#include "ptp.h"
enum {
/* Revisions 0-2 were Falcon A0, A1 and B0 respectively.
* They are not supported by this driver but these revision numbers
* form part of the ethtool API for register dumping.
*/
EFX_REV_SIENA_A0 = 3,
EFX_REV_HUNT_A0 = 4,
};
static inline int efx_nic_rev(struct efx_nic *efx)
{
return efx->type->revision;
}
/* Read the current event from the event queue */
static inline efx_qword_t *efx_event(struct efx_channel *channel,
unsigned int index)
{
return ((efx_qword_t *) (channel->eventq.buf.addr)) +
(index & channel->eventq_mask);
}
/* See if an event is present
*
* We check both the high and low dword of the event for all ones. We
* wrote all ones when we cleared the event, and no valid event can
* have all ones in either its high or low dwords. This approach is
* robust against reordering.
*
* Note that using a single 64-bit comparison is incorrect; even
* though the CPU read will be atomic, the DMA write may not be.
*/
static inline int efx_event_present(efx_qword_t *event)
{
return !(EFX_DWORD_IS_ALL_ONES(event->dword[0]) |
EFX_DWORD_IS_ALL_ONES(event->dword[1]));
}
/* Returns a pointer to the specified transmit descriptor in the TX
* descriptor queue belonging to the specified channel.
*/
static inline efx_qword_t *
efx_tx_desc(struct efx_tx_queue *tx_queue, unsigned int index)
{
return ((efx_qword_t *) (tx_queue->txd.buf.addr)) + index;
}
/* Report whether this TX queue would be empty for the given write_count.
* May return false negative.
*/
static inline bool __efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue,
unsigned int write_count)
{
unsigned int empty_read_count = READ_ONCE(tx_queue->empty_read_count);
if (empty_read_count == 0)
return false;
return ((empty_read_count ^ write_count) & ~EFX_EMPTY_COUNT_VALID) == 0;
}
/* Report whether the NIC considers this TX queue empty, using
* packet_write_count (the write count recorded for the last completable
* doorbell push). May return false negative. EF10 only, which is OK
* because only EF10 supports PIO.
*/
static inline bool efx_nic_tx_is_empty(struct efx_tx_queue *tx_queue)
{
EFX_WARN_ON_ONCE_PARANOID(!tx_queue->efx->type->option_descriptors);
return __efx_nic_tx_is_empty(tx_queue, tx_queue->packet_write_count);
}
/* Get partner of a TX queue, seen as part of the same net core queue */
/* XXX is this a thing on EF100? */
static inline struct efx_tx_queue *efx_tx_queue_partner(struct efx_tx_queue *tx_queue)
{
if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
return tx_queue - EFX_TXQ_TYPE_OFFLOAD;
else
return tx_queue + EFX_TXQ_TYPE_OFFLOAD;
}
/* Decide whether we can use TX PIO, ie. write packet data directly into
* a buffer on the device. This can reduce latency at the expense of
* throughput, so we only do this if both hardware and software TX rings
* are empty. This also ensures that only one packet at a time can be
* using the PIO buffer.
*/
static inline bool efx_nic_may_tx_pio(struct efx_tx_queue *tx_queue)
{
struct efx_tx_queue *partner = efx_tx_queue_partner(tx_queue);
return tx_queue->piobuf && efx_nic_tx_is_empty(tx_queue) &&
efx_nic_tx_is_empty(partner);
}
/* Decide whether to push a TX descriptor to the NIC vs merely writing
* the doorbell. This can reduce latency when we are adding a single
* descriptor to an empty queue, but is otherwise pointless. Further,
* Falcon and Siena have hardware bugs (SF bug 33851) that may be
* triggered if we don't check this.
* We use the write_count used for the last doorbell push, to get the
* NIC's view of the tx queue.
*/
static inline bool efx_nic_may_push_tx_desc(struct efx_tx_queue *tx_queue,
unsigned int write_count)
{
bool was_empty = __efx_nic_tx_is_empty(tx_queue, write_count);
tx_queue->empty_read_count = 0;
return was_empty && tx_queue->write_count - write_count == 1;
}
/* Returns a pointer to the specified descriptor in the RX descriptor queue */
static inline efx_qword_t *
efx_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index)
{
return ((efx_qword_t *) (rx_queue->rxd.buf.addr)) + index;
}
/* Alignment of PCIe DMA boundaries (4KB) */
#define EFX_PAGE_SIZE 4096
/* Size and alignment of buffer table entries (same) */
#define EFX_BUF_SIZE EFX_PAGE_SIZE
/* NIC-generic software stats */
enum {
GENERIC_STAT_rx_noskb_drops,
GENERIC_STAT_rx_nodesc_trunc,
GENERIC_STAT_COUNT
};
#define EFX_GENERIC_SW_STAT(ext_name) \
[GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
/* TX data path */
static inline int efx_nic_probe_tx(struct efx_tx_queue *tx_queue)
{
return tx_queue->efx->type->tx_probe(tx_queue);
}
static inline void efx_nic_init_tx(struct efx_tx_queue *tx_queue)
{
tx_queue->efx->type->tx_init(tx_queue);
}
static inline void efx_nic_remove_tx(struct efx_tx_queue *tx_queue)
{
tx_queue->efx->type->tx_remove(tx_queue);
}
static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
{
tx_queue->efx->type->tx_write(tx_queue);
}
/* RX data path */
static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue)
{
return rx_queue->efx->type->rx_probe(rx_queue);
}
static inline void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
{
rx_queue->efx->type->rx_init(rx_queue);
}
static inline void efx_nic_remove_rx(struct efx_rx_queue *rx_queue)
{
rx_queue->efx->type->rx_remove(rx_queue);
}
static inline void efx_nic_notify_rx_desc(struct efx_rx_queue *rx_queue)
{
rx_queue->efx->type->rx_write(rx_queue);
}
static inline void efx_nic_generate_fill_event(struct efx_rx_queue *rx_queue)
{
rx_queue->efx->type->rx_defer_refill(rx_queue);
}
/* Event data path */
static inline int efx_nic_probe_eventq(struct efx_channel *channel)
{
return channel->efx->type->ev_probe(channel);
}
static inline int efx_nic_init_eventq(struct efx_channel *channel)
{
return channel->efx->type->ev_init(channel);
}
static inline void efx_nic_fini_eventq(struct efx_channel *channel)
{
channel->efx->type->ev_fini(channel);
}
static inline void efx_nic_remove_eventq(struct efx_channel *channel)
{
channel->efx->type->ev_remove(channel);
}
static inline int
efx_nic_process_eventq(struct efx_channel *channel, int quota)
{
return channel->efx->type->ev_process(channel, quota);
}
static inline void efx_nic_eventq_read_ack(struct efx_channel *channel)
{
channel->efx->type->ev_read_ack(channel);
}
void efx_nic_event_test_start(struct efx_channel *channel);
bool efx_nic_event_present(struct efx_channel *channel);
/* Some statistics are computed as A - B where A and B each increase
* linearly with some hardware counter(s) and the counters are read
* asynchronously. If the counters contributing to B are always read
* after those contributing to A, the computed value may be lower than
* the true value by some variable amount, and may decrease between
* subsequent computations.
*
* We should never allow statistics to decrease or to exceed the true
* value. Since the computed value will never be greater than the
* true value, we can achieve this by only storing the computed value
* when it increases.
*/
static inline void efx_update_diff_stat(u64 *stat, u64 diff)
{
if ((s64)(diff - *stat) > 0)
*stat = diff;
}
/* Interrupts */
int efx_nic_init_interrupt(struct efx_nic *efx);
int efx_nic_irq_test_start(struct efx_nic *efx);
void efx_nic_fini_interrupt(struct efx_nic *efx);
static inline int efx_nic_event_test_irq_cpu(struct efx_channel *channel)
{
return READ_ONCE(channel->event_test_cpu);
}
static inline int efx_nic_irq_test_irq_cpu(struct efx_nic *efx)
{
return READ_ONCE(efx->last_irq_cpu);
}
/* Global Resources */
int efx_nic_alloc_buffer(struct efx_nic *efx, struct efx_buffer *buffer,
unsigned int len, gfp_t gfp_flags);
void efx_nic_free_buffer(struct efx_nic *efx, struct efx_buffer *buffer);
size_t efx_nic_get_regs_len(struct efx_nic *efx);
void efx_nic_get_regs(struct efx_nic *efx, void *buf);
size_t efx_nic_describe_stats(const struct efx_hw_stat_desc *desc, size_t count,
const unsigned long *mask, u8 *names);
int efx_nic_copy_stats(struct efx_nic *efx, __le64 *dest);
void efx_nic_update_stats(const struct efx_hw_stat_desc *desc, size_t count,
const unsigned long *mask, u64 *stats,
const void *dma_buf, bool accumulate);
void efx_nic_fix_nodesc_drop_stat(struct efx_nic *efx, u64 *stat);
#define EFX_MAX_FLUSH_TIME 5000
#endif /* EFX_NIC_COMMON_H */
...@@ -35,7 +35,6 @@ ...@@ -35,7 +35,6 @@
#include <linux/time.h> #include <linux/time.h>
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/net_tstamp.h>
#include <linux/pps_kernel.h> #include <linux/pps_kernel.h>
#include <linux/ptp_clock_kernel.h> #include <linux/ptp_clock_kernel.h>
#include "net_driver.h" #include "net_driver.h"
...@@ -44,7 +43,7 @@ ...@@ -44,7 +43,7 @@
#include "mcdi_pcol.h" #include "mcdi_pcol.h"
#include "io.h" #include "io.h"
#include "farch_regs.h" #include "farch_regs.h"
#include "nic.h" #include "nic.h" /* indirectly includes ptp.h */
/* Maximum number of events expected to make up a PTP event */ /* Maximum number of events expected to make up a PTP event */
#define MAX_EVENT_FRAGS 3 #define MAX_EVENT_FRAGS 3
...@@ -352,7 +351,7 @@ static int efx_phc_enable(struct ptp_clock_info *ptp, ...@@ -352,7 +351,7 @@ static int efx_phc_enable(struct ptp_clock_info *ptp,
bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx) bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx)
{ {
return efx_has_cap(efx, TX_MAC_TIMESTAMPING, FLAGS2); return efx_has_cap(efx, TX_MAC_TIMESTAMPING);
} }
/* PTP 'extra' channel is still a traffic channel, but we only create TX queues /* PTP 'extra' channel is still a traffic channel, but we only create TX queues
......
/* SPDX-License-Identifier: GPL-2.0-only */
/****************************************************************************
* Driver for Solarflare network controllers and boards
* Copyright 2005-2006 Fen Systems Ltd.
* Copyright 2006-2013 Solarflare Communications Inc.
* Copyright 2019-2020 Xilinx Inc.
*/
#ifndef EFX_PTP_H
#define EFX_PTP_H
#include <linux/net_tstamp.h>
#include "net_driver.h"
struct ethtool_ts_info;
int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel);
void efx_ptp_defer_probe_with_channel(struct efx_nic *efx);
struct efx_channel *efx_ptp_channel(struct efx_nic *efx);
void efx_ptp_remove(struct efx_nic *efx);
int efx_ptp_set_ts_config(struct efx_nic *efx, struct ifreq *ifr);
int efx_ptp_get_ts_config(struct efx_nic *efx, struct ifreq *ifr);
void efx_ptp_get_ts_info(struct efx_nic *efx, struct ethtool_ts_info *ts_info);
bool efx_ptp_is_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
int efx_ptp_get_mode(struct efx_nic *efx);
int efx_ptp_change_mode(struct efx_nic *efx, bool enable_wanted,
unsigned int new_mode);
int efx_ptp_tx(struct efx_nic *efx, struct sk_buff *skb);
void efx_ptp_event(struct efx_nic *efx, efx_qword_t *ev);
size_t efx_ptp_describe_stats(struct efx_nic *efx, u8 *strings);
size_t efx_ptp_update_stats(struct efx_nic *efx, u64 *stats);
void efx_time_sync_event(struct efx_channel *channel, efx_qword_t *ev);
void __efx_rx_skb_attach_timestamp(struct efx_channel *channel,
struct sk_buff *skb);
static inline void efx_rx_skb_attach_timestamp(struct efx_channel *channel,
struct sk_buff *skb)
{
if (channel->sync_events_state == SYNC_EVENTS_VALID)
__efx_rx_skb_attach_timestamp(channel, skb);
}
void efx_ptp_start_datapath(struct efx_nic *efx);
void efx_ptp_stop_datapath(struct efx_nic *efx);
bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx);
ktime_t efx_ptp_nic_to_kernel_time(struct efx_tx_queue *tx_queue);
#endif /* EFX_PTP_H */
...@@ -411,243 +411,9 @@ void __efx_rx_packet(struct efx_channel *channel) ...@@ -411,243 +411,9 @@ void __efx_rx_packet(struct efx_channel *channel)
rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb) if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb)
efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh); efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh, 0);
else else
efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
out: out:
channel->rx_pkt_n_frags = 0; channel->rx_pkt_n_frags = 0;
} }
#ifdef CONFIG_RFS_ACCEL
static void efx_filter_rfs_work(struct work_struct *data)
{
struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
work);
struct efx_nic *efx = netdev_priv(req->net_dev);
struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
int slot_idx = req - efx->rps_slot;
struct efx_arfs_rule *rule;
u16 arfs_id = 0;
int rc;
rc = efx->type->filter_insert(efx, &req->spec, true);
if (rc >= 0)
/* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
rc %= efx->type->max_rx_ip_filters;
if (efx->rps_hash_table) {
spin_lock_bh(&efx->rps_hash_lock);
rule = efx_rps_hash_find(efx, &req->spec);
/* The rule might have already gone, if someone else's request
* for the same spec was already worked and then expired before
* we got around to our work. In that case we have nothing
* tying us to an arfs_id, meaning that as soon as the filter
* is considered for expiry it will be removed.
*/
if (rule) {
if (rc < 0)
rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
else
rule->filter_id = rc;
arfs_id = rule->arfs_id;
}
spin_unlock_bh(&efx->rps_hash_lock);
}
if (rc >= 0) {
/* Remember this so we can check whether to expire the filter
* later.
*/
mutex_lock(&efx->rps_mutex);
if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
channel->rfs_filter_count++;
channel->rps_flow_id[rc] = req->flow_id;
mutex_unlock(&efx->rps_mutex);
if (req->spec.ether_type == htons(ETH_P_IP))
netif_info(efx, rx_status, efx->net_dev,
"steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc, arfs_id);
else
netif_info(efx, rx_status, efx->net_dev,
"steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc, arfs_id);
channel->n_rfs_succeeded++;
} else {
if (req->spec.ether_type == htons(ETH_P_IP))
netif_dbg(efx, rx_status, efx->net_dev,
"failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc, arfs_id);
else
netif_dbg(efx, rx_status, efx->net_dev,
"failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc, arfs_id);
channel->n_rfs_failed++;
/* We're overloading the NIC's filter tables, so let's do a
* chunk of extra expiry work.
*/
__efx_filter_rfs_expire(channel, min(channel->rfs_filter_count,
100u));
}
/* Release references */
clear_bit(slot_idx, &efx->rps_slot_map);
dev_put(req->net_dev);
}
int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id)
{
struct efx_nic *efx = netdev_priv(net_dev);
struct efx_async_filter_insertion *req;
struct efx_arfs_rule *rule;
struct flow_keys fk;
int slot_idx;
bool new;
int rc;
/* find a free slot */
for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
break;
if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
return -EBUSY;
if (flow_id == RPS_FLOW_ID_INVALID) {
rc = -EINVAL;
goto out_clear;
}
if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
rc = -EPROTONOSUPPORT;
goto out_clear;
}
if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
rc = -EPROTONOSUPPORT;
goto out_clear;
}
if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
rc = -EPROTONOSUPPORT;
goto out_clear;
}
req = efx->rps_slot + slot_idx;
efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
rxq_index);
req->spec.match_flags =
EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
req->spec.ether_type = fk.basic.n_proto;
req->spec.ip_proto = fk.basic.ip_proto;
if (fk.basic.n_proto == htons(ETH_P_IP)) {
req->spec.rem_host[0] = fk.addrs.v4addrs.src;
req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
} else {
memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
sizeof(struct in6_addr));
memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
sizeof(struct in6_addr));
}
req->spec.rem_port = fk.ports.src;
req->spec.loc_port = fk.ports.dst;
if (efx->rps_hash_table) {
/* Add it to ARFS hash table */
spin_lock(&efx->rps_hash_lock);
rule = efx_rps_hash_add(efx, &req->spec, &new);
if (!rule) {
rc = -ENOMEM;
goto out_unlock;
}
if (new)
rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
rc = rule->arfs_id;
/* Skip if existing or pending filter already does the right thing */
if (!new && rule->rxq_index == rxq_index &&
rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
goto out_unlock;
rule->rxq_index = rxq_index;
rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
spin_unlock(&efx->rps_hash_lock);
} else {
/* Without an ARFS hash table, we just use arfs_id 0 for all
* filters. This means if multiple flows hash to the same
* flow_id, all but the most recently touched will be eligible
* for expiry.
*/
rc = 0;
}
/* Queue the request */
dev_hold(req->net_dev = net_dev);
INIT_WORK(&req->work, efx_filter_rfs_work);
req->rxq_index = rxq_index;
req->flow_id = flow_id;
schedule_work(&req->work);
return rc;
out_unlock:
spin_unlock(&efx->rps_hash_lock);
out_clear:
clear_bit(slot_idx, &efx->rps_slot_map);
return rc;
}
bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
{
bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
struct efx_nic *efx = channel->efx;
unsigned int index, size, start;
u32 flow_id;
if (!mutex_trylock(&efx->rps_mutex))
return false;
expire_one = efx->type->filter_rfs_expire_one;
index = channel->rfs_expire_index;
start = index;
size = efx->type->max_rx_ip_filters;
while (quota) {
flow_id = channel->rps_flow_id[index];
if (flow_id != RPS_FLOW_ID_INVALID) {
quota--;
if (expire_one(efx, flow_id, index)) {
netif_info(efx, rx_status, efx->net_dev,
"expired filter %d [channel %u flow %u]\n",
index, channel->channel, flow_id);
channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
channel->rfs_filter_count--;
}
}
if (++index == size)
index = 0;
/* If we were called with a quota that exceeds the total number
* of filters in the table (which shouldn't happen, but could
* if two callers race), ensure that we don't loop forever -
* stop when we've examined every row of the table.
*/
if (index == start)
break;
}
channel->rfs_expire_index = index;
mutex_unlock(&efx->rps_mutex);
return true;
}
#endif /* CONFIG_RFS_ACCEL */
...@@ -510,7 +510,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic) ...@@ -510,7 +510,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
*/ */
void void
efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
unsigned int n_frags, u8 *eh) unsigned int n_frags, u8 *eh, __wsum csum)
{ {
struct napi_struct *napi = &channel->napi_str; struct napi_struct *napi = &channel->napi_str;
struct efx_nic *efx = channel->efx; struct efx_nic *efx = channel->efx;
...@@ -528,8 +528,13 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, ...@@ -528,8 +528,13 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
if (efx->net_dev->features & NETIF_F_RXHASH) if (efx->net_dev->features & NETIF_F_RXHASH)
skb_set_hash(skb, efx_rx_buf_hash(efx, eh), skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
PKT_HASH_TYPE_L3); PKT_HASH_TYPE_L3);
if (csum) {
skb->csum = csum;
skb->ip_summed = CHECKSUM_COMPLETE;
} else {
skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ? skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE); CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
}
skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
for (;;) { for (;;) {
...@@ -849,3 +854,237 @@ void efx_remove_filters(struct efx_nic *efx) ...@@ -849,3 +854,237 @@ void efx_remove_filters(struct efx_nic *efx)
efx->type->filter_table_remove(efx); efx->type->filter_table_remove(efx);
up_write(&efx->filter_sem); up_write(&efx->filter_sem);
} }
#ifdef CONFIG_RFS_ACCEL
static void efx_filter_rfs_work(struct work_struct *data)
{
struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
work);
struct efx_nic *efx = netdev_priv(req->net_dev);
struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
int slot_idx = req - efx->rps_slot;
struct efx_arfs_rule *rule;
u16 arfs_id = 0;
int rc;
rc = efx->type->filter_insert(efx, &req->spec, true);
if (rc >= 0)
/* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
rc %= efx->type->max_rx_ip_filters;
if (efx->rps_hash_table) {
spin_lock_bh(&efx->rps_hash_lock);
rule = efx_rps_hash_find(efx, &req->spec);
/* The rule might have already gone, if someone else's request
* for the same spec was already worked and then expired before
* we got around to our work. In that case we have nothing
* tying us to an arfs_id, meaning that as soon as the filter
* is considered for expiry it will be removed.
*/
if (rule) {
if (rc < 0)
rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
else
rule->filter_id = rc;
arfs_id = rule->arfs_id;
}
spin_unlock_bh(&efx->rps_hash_lock);
}
if (rc >= 0) {
/* Remember this so we can check whether to expire the filter
* later.
*/
mutex_lock(&efx->rps_mutex);
if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
channel->rfs_filter_count++;
channel->rps_flow_id[rc] = req->flow_id;
mutex_unlock(&efx->rps_mutex);
if (req->spec.ether_type == htons(ETH_P_IP))
netif_info(efx, rx_status, efx->net_dev,
"steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc, arfs_id);
else
netif_info(efx, rx_status, efx->net_dev,
"steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc, arfs_id);
channel->n_rfs_succeeded++;
} else {
if (req->spec.ether_type == htons(ETH_P_IP))
netif_dbg(efx, rx_status, efx->net_dev,
"failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc, arfs_id);
else
netif_dbg(efx, rx_status, efx->net_dev,
"failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
(req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
req->spec.rem_host, ntohs(req->spec.rem_port),
req->spec.loc_host, ntohs(req->spec.loc_port),
req->rxq_index, req->flow_id, rc, arfs_id);
channel->n_rfs_failed++;
/* We're overloading the NIC's filter tables, so let's do a
* chunk of extra expiry work.
*/
__efx_filter_rfs_expire(channel, min(channel->rfs_filter_count,
100u));
}
/* Release references */
clear_bit(slot_idx, &efx->rps_slot_map);
dev_put(req->net_dev);
}
int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id)
{
struct efx_nic *efx = netdev_priv(net_dev);
struct efx_async_filter_insertion *req;
struct efx_arfs_rule *rule;
struct flow_keys fk;
int slot_idx;
bool new;
int rc;
/* find a free slot */
for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
break;
if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
return -EBUSY;
if (flow_id == RPS_FLOW_ID_INVALID) {
rc = -EINVAL;
goto out_clear;
}
if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
rc = -EPROTONOSUPPORT;
goto out_clear;
}
if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
rc = -EPROTONOSUPPORT;
goto out_clear;
}
if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
rc = -EPROTONOSUPPORT;
goto out_clear;
}
req = efx->rps_slot + slot_idx;
efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
rxq_index);
req->spec.match_flags =
EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
req->spec.ether_type = fk.basic.n_proto;
req->spec.ip_proto = fk.basic.ip_proto;
if (fk.basic.n_proto == htons(ETH_P_IP)) {
req->spec.rem_host[0] = fk.addrs.v4addrs.src;
req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
} else {
memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
sizeof(struct in6_addr));
memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
sizeof(struct in6_addr));
}
req->spec.rem_port = fk.ports.src;
req->spec.loc_port = fk.ports.dst;
if (efx->rps_hash_table) {
/* Add it to ARFS hash table */
spin_lock(&efx->rps_hash_lock);
rule = efx_rps_hash_add(efx, &req->spec, &new);
if (!rule) {
rc = -ENOMEM;
goto out_unlock;
}
if (new)
rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
rc = rule->arfs_id;
/* Skip if existing or pending filter already does the right thing */
if (!new && rule->rxq_index == rxq_index &&
rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
goto out_unlock;
rule->rxq_index = rxq_index;
rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
spin_unlock(&efx->rps_hash_lock);
} else {
/* Without an ARFS hash table, we just use arfs_id 0 for all
* filters. This means if multiple flows hash to the same
* flow_id, all but the most recently touched will be eligible
* for expiry.
*/
rc = 0;
}
/* Queue the request */
dev_hold(req->net_dev = net_dev);
INIT_WORK(&req->work, efx_filter_rfs_work);
req->rxq_index = rxq_index;
req->flow_id = flow_id;
schedule_work(&req->work);
return rc;
out_unlock:
spin_unlock(&efx->rps_hash_lock);
out_clear:
clear_bit(slot_idx, &efx->rps_slot_map);
return rc;
}
bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
{
bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
struct efx_nic *efx = channel->efx;
unsigned int index, size, start;
u32 flow_id;
if (!mutex_trylock(&efx->rps_mutex))
return false;
expire_one = efx->type->filter_rfs_expire_one;
index = channel->rfs_expire_index;
start = index;
size = efx->type->max_rx_ip_filters;
while (quota) {
flow_id = channel->rps_flow_id[index];
if (flow_id != RPS_FLOW_ID_INVALID) {
quota--;
if (expire_one(efx, flow_id, index)) {
netif_info(efx, rx_status, efx->net_dev,
"expired filter %d [channel %u flow %u]\n",
index, channel->channel, flow_id);
channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
channel->rfs_filter_count--;
}
}
if (++index == size)
index = 0;
/* If we were called with a quota that exceeds the total number
* of filters in the table (which shouldn't happen, but could
* if two callers race), ensure that we don't loop forever -
* stop when we've examined every row of the table.
*/
if (index == start)
break;
}
channel->rfs_expire_index = index;
mutex_unlock(&efx->rps_mutex);
return true;
}
#endif /* CONFIG_RFS_ACCEL */
...@@ -67,7 +67,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic); ...@@ -67,7 +67,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
void void
efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
unsigned int n_frags, u8 *eh); unsigned int n_frags, u8 *eh, __wsum csum);
struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
...@@ -89,6 +89,10 @@ struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx, ...@@ -89,6 +89,10 @@ struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
const struct efx_filter_spec *spec, const struct efx_filter_spec *spec,
bool *new); bool *new);
void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec); void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
u16 rxq_index, u32 flow_id);
bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota);
#endif #endif
int efx_probe_filters(struct efx_nic *efx); int efx_probe_filters(struct efx_nic *efx);
......
...@@ -276,6 +276,7 @@ static int siena_probe_nic(struct efx_nic *efx) ...@@ -276,6 +276,7 @@ static int siena_probe_nic(struct efx_nic *efx)
} }
efx->max_channels = EFX_MAX_CHANNELS; efx->max_channels = EFX_MAX_CHANNELS;
efx->max_vis = EFX_MAX_CHANNELS;
efx->max_tx_channels = EFX_MAX_CHANNELS; efx->max_tx_channels = EFX_MAX_CHANNELS;
efx_reado(efx, &reg, FR_AZ_CS_DEBUG); efx_reado(efx, &reg, FR_AZ_CS_DEBUG);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment