Commit 739bb23d authored by Ben Hutchings's avatar Ben Hutchings Committed by Jeff Garzik

sfc: Do not reset when hardware monitor detects a fault

The TX watchdog should trigger a reset, but a temperature/power alarm
should not as this is unlikely to solve the problem.
Signed-off-by: default avatarBen Hutchings <bhutchings@solarflare.com>
Signed-off-by: default avatarJeff Garzik <jgarzik@redhat.com>
parent 3e133c44
...@@ -77,11 +77,6 @@ static int napi_weight = 64; ...@@ -77,11 +77,6 @@ static int napi_weight = 64;
*/ */
unsigned int efx_monitor_interval = 1 * HZ; unsigned int efx_monitor_interval = 1 * HZ;
/* This controls whether or not the hardware monitor will trigger a
* reset when it detects an error condition.
*/
static unsigned int monitor_reset = true;
/* This controls whether or not the driver will initialise devices /* This controls whether or not the driver will initialise devices
* with invalid MAC addresses stored in the EEPROM or flash. If true, * with invalid MAC addresses stored in the EEPROM or flash. If true,
* such devices will be initialised with a random locally-generated * such devices will be initialised with a random locally-generated
...@@ -1176,17 +1171,6 @@ static void efx_monitor(struct work_struct *data) ...@@ -1176,17 +1171,6 @@ static void efx_monitor(struct work_struct *data)
rc = falcon_check_xmac(efx); rc = falcon_check_xmac(efx);
mutex_unlock(&efx->mac_lock); mutex_unlock(&efx->mac_lock);
if (rc) {
if (monitor_reset) {
EFX_ERR(efx, "hardware monitor detected a fault: "
"triggering reset\n");
efx_schedule_reset(efx, RESET_TYPE_MONITOR);
} else {
EFX_ERR(efx, "hardware monitor detected a fault, "
"skipping reset\n");
}
}
queue_delayed_work(efx->workqueue, &efx->monitor_work, queue_delayed_work(efx->workqueue, &efx->monitor_work,
efx_monitor_interval); efx_monitor_interval);
} }
...@@ -1358,12 +1342,11 @@ static void efx_watchdog(struct net_device *net_dev) ...@@ -1358,12 +1342,11 @@ static void efx_watchdog(struct net_device *net_dev)
{ {
struct efx_nic *efx = netdev_priv(net_dev); struct efx_nic *efx = netdev_priv(net_dev);
EFX_ERR(efx, "TX stuck with stop_count=%d port_enabled=%d: %s\n", EFX_ERR(efx, "TX stuck with stop_count=%d port_enabled=%d:"
atomic_read(&efx->netif_stop_count), efx->port_enabled, " resetting channels\n",
monitor_reset ? "resetting channels" : "skipping reset"); atomic_read(&efx->netif_stop_count), efx->port_enabled);
if (monitor_reset) efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
efx_schedule_reset(efx, RESET_TYPE_MONITOR);
} }
......
...@@ -72,7 +72,7 @@ extern const char *efx_loopback_mode_names[]; ...@@ -72,7 +72,7 @@ extern const char *efx_loopback_mode_names[];
* @RESET_TYPE_ALL: reset everything but PCI core blocks * @RESET_TYPE_ALL: reset everything but PCI core blocks
* @RESET_TYPE_WORLD: reset everything, save & restore PCI config * @RESET_TYPE_WORLD: reset everything, save & restore PCI config
* @RESET_TYPE_DISABLE: disable NIC * @RESET_TYPE_DISABLE: disable NIC
* @RESET_TYPE_MONITOR: reset due to hardware monitor * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
* @RESET_TYPE_INT_ERROR: reset due to internal error * @RESET_TYPE_INT_ERROR: reset due to internal error
* @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors * @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors
* @RESET_TYPE_RX_DESC_FETCH: pcie error during rx descriptor fetch * @RESET_TYPE_RX_DESC_FETCH: pcie error during rx descriptor fetch
...@@ -86,7 +86,7 @@ enum reset_type { ...@@ -86,7 +86,7 @@ enum reset_type {
RESET_TYPE_WORLD = 2, RESET_TYPE_WORLD = 2,
RESET_TYPE_DISABLE = 3, RESET_TYPE_DISABLE = 3,
RESET_TYPE_MAX_METHOD, RESET_TYPE_MAX_METHOD,
RESET_TYPE_MONITOR, RESET_TYPE_TX_WATCHDOG,
RESET_TYPE_INT_ERROR, RESET_TYPE_INT_ERROR,
RESET_TYPE_RX_RECOVERY, RESET_TYPE_RX_RECOVERY,
RESET_TYPE_RX_DESC_FETCH, RESET_TYPE_RX_DESC_FETCH,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment