Commit 62a261f6 authored by David Arinzon's avatar David Arinzon Committed by Jakub Kicinski

net: ena: Add a counter for driver's reset failures

This patch adds a counter to the ena_adapter struct in
order to keep track of reset failures.
The counter is incremented every time either ena_restore_device()
or ena_destroy_device() fail.
Signed-off-by: default avatarOsama Abboud <osamaabb@amazon.com>
Signed-off-by: default avatarDavid Arinzon <darinzon@amazon.com>
Reviewed-by: default avatarSimon Horman <horms@kernel.org>
Link: https://lore.kernel.org/r/20240512134637.25299-2-darinzon@amazon.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 5fcc17df
...@@ -49,6 +49,7 @@ static const struct ena_stats ena_stats_global_strings[] = { ...@@ -49,6 +49,7 @@ static const struct ena_stats ena_stats_global_strings[] = {
ENA_STAT_GLOBAL_ENTRY(interface_up), ENA_STAT_GLOBAL_ENTRY(interface_up),
ENA_STAT_GLOBAL_ENTRY(interface_down), ENA_STAT_GLOBAL_ENTRY(interface_down),
ENA_STAT_GLOBAL_ENTRY(admin_q_pause), ENA_STAT_GLOBAL_ENTRY(admin_q_pause),
ENA_STAT_GLOBAL_ENTRY(reset_fail),
}; };
static const struct ena_stats ena_stats_eni_strings[] = { static const struct ena_stats ena_stats_eni_strings[] = {
......
...@@ -42,7 +42,7 @@ MODULE_DEVICE_TABLE(pci, ena_pci_tbl); ...@@ -42,7 +42,7 @@ MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
static int ena_rss_init_default(struct ena_adapter *adapter); static int ena_rss_init_default(struct ena_adapter *adapter);
static void check_for_admin_com_state(struct ena_adapter *adapter); static void check_for_admin_com_state(struct ena_adapter *adapter);
static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_destroy_device(struct ena_adapter *adapter, bool graceful);
static int ena_restore_device(struct ena_adapter *adapter); static int ena_restore_device(struct ena_adapter *adapter);
static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
...@@ -3235,14 +3235,15 @@ static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter) ...@@ -3235,14 +3235,15 @@ static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
return rc; return rc;
} }
static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) static int ena_destroy_device(struct ena_adapter *adapter, bool graceful)
{ {
struct net_device *netdev = adapter->netdev; struct net_device *netdev = adapter->netdev;
struct ena_com_dev *ena_dev = adapter->ena_dev; struct ena_com_dev *ena_dev = adapter->ena_dev;
bool dev_up; bool dev_up;
int rc = 0;
if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)) if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
return; return 0;
netif_carrier_off(netdev); netif_carrier_off(netdev);
...@@ -3260,7 +3261,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) ...@@ -3260,7 +3261,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
* and device is up, ena_down() already reset the device. * and device is up, ena_down() already reset the device.
*/ */
if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up)) if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason); rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
ena_free_mgmnt_irq(adapter); ena_free_mgmnt_irq(adapter);
...@@ -3279,6 +3280,8 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful) ...@@ -3279,6 +3280,8 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
return rc;
} }
static int ena_restore_device(struct ena_adapter *adapter) static int ena_restore_device(struct ena_adapter *adapter)
...@@ -3355,14 +3358,17 @@ static int ena_restore_device(struct ena_adapter *adapter) ...@@ -3355,14 +3358,17 @@ static int ena_restore_device(struct ena_adapter *adapter)
static void ena_fw_reset_device(struct work_struct *work) static void ena_fw_reset_device(struct work_struct *work)
{ {
int rc = 0;
struct ena_adapter *adapter = struct ena_adapter *adapter =
container_of(work, struct ena_adapter, reset_task); container_of(work, struct ena_adapter, reset_task);
rtnl_lock(); rtnl_lock();
if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
ena_destroy_device(adapter, false); rc |= ena_destroy_device(adapter, false);
ena_restore_device(adapter); rc |= ena_restore_device(adapter);
adapter->dev_stats.reset_fail += !!rc;
dev_err(&adapter->pdev->dev, "Device reset completed successfully\n"); dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
} }
......
...@@ -290,6 +290,7 @@ struct ena_stats_dev { ...@@ -290,6 +290,7 @@ struct ena_stats_dev {
u64 admin_q_pause; u64 admin_q_pause;
u64 rx_drops; u64 rx_drops;
u64 tx_drops; u64 tx_drops;
u64 reset_fail;
}; };
enum ena_flags_t { enum ena_flags_t {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment