Commit a7d529ae authored by Ben Hutchings's avatar Ben Hutchings

sfc: Allow resets to be upgraded; use atomic ops for safety

Currently an attempt to schedule any reset is ignored if a reset
is already pending.  This ignores the relative scopes - if the
requested reset is greater in scope then the scheduled reset should
be upgraded accordingly.

There are also some race conditions which could lead to a reset
request being lost.  Deal with them by using atomic operations on a
bitmask.  This also makes tests on reset_pending easier to get right.
Signed-off-by: default avatarBen Hutchings <bhutchings@solarflare.com>
parent 4017dbdc
...@@ -229,8 +229,7 @@ static int efx_process_channel(struct efx_channel *channel, int budget) ...@@ -229,8 +229,7 @@ static int efx_process_channel(struct efx_channel *channel, int budget)
struct efx_nic *efx = channel->efx; struct efx_nic *efx = channel->efx;
int spent; int spent;
if (unlikely(efx->reset_pending != RESET_TYPE_NONE || if (unlikely(efx->reset_pending || !channel->enabled))
!channel->enabled))
return 0; return 0;
spent = efx_nic_process_eventq(channel, budget); spent = efx_nic_process_eventq(channel, budget);
...@@ -1461,7 +1460,7 @@ static void efx_start_all(struct efx_nic *efx) ...@@ -1461,7 +1460,7 @@ static void efx_start_all(struct efx_nic *efx)
* reset_pending [modified from an atomic context], we instead guarantee * reset_pending [modified from an atomic context], we instead guarantee
* that efx_mcdi_mode_poll() isn't reverted erroneously */ * that efx_mcdi_mode_poll() isn't reverted erroneously */
efx_mcdi_mode_event(efx); efx_mcdi_mode_event(efx);
if (efx->reset_pending != RESET_TYPE_NONE) if (efx->reset_pending)
efx_mcdi_mode_poll(efx); efx_mcdi_mode_poll(efx);
/* Start the hardware monitor if there is one. Otherwise (we're link /* Start the hardware monitor if there is one. Otherwise (we're link
...@@ -2118,8 +2117,10 @@ int efx_reset(struct efx_nic *efx, enum reset_type method) ...@@ -2118,8 +2117,10 @@ int efx_reset(struct efx_nic *efx, enum reset_type method)
goto out; goto out;
} }
/* Allow resets to be rescheduled. */ /* Clear flags for the scopes we covered. We assume the NIC and
efx->reset_pending = RESET_TYPE_NONE; * driver are now quiescent so that there is no race here.
*/
efx->reset_pending &= -(1 << (method + 1));
/* Reinitialise bus-mastering, which may have been turned off before /* Reinitialise bus-mastering, which may have been turned off before
* the reset was scheduled. This is still appropriate, even in the * the reset was scheduled. This is still appropriate, even in the
...@@ -2154,12 +2155,13 @@ int efx_reset(struct efx_nic *efx, enum reset_type method) ...@@ -2154,12 +2155,13 @@ int efx_reset(struct efx_nic *efx, enum reset_type method)
static void efx_reset_work(struct work_struct *data) static void efx_reset_work(struct work_struct *data)
{ {
struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
unsigned long pending = ACCESS_ONCE(efx->reset_pending);
if (efx->reset_pending == RESET_TYPE_NONE) if (!pending)
return; return;
/* If we're not RUNNING then don't reset. Leave the reset_pending /* If we're not RUNNING then don't reset. Leave the reset_pending
* flag set so that efx_pci_probe_main will be retried */ * flags set so that efx_pci_probe_main will be retried */
if (efx->state != STATE_RUNNING) { if (efx->state != STATE_RUNNING) {
netif_info(efx, drv, efx->net_dev, netif_info(efx, drv, efx->net_dev,
"scheduled reset quenched. NIC not RUNNING\n"); "scheduled reset quenched. NIC not RUNNING\n");
...@@ -2167,7 +2169,7 @@ static void efx_reset_work(struct work_struct *data) ...@@ -2167,7 +2169,7 @@ static void efx_reset_work(struct work_struct *data)
} }
rtnl_lock(); rtnl_lock();
(void)efx_reset(efx, efx->reset_pending); (void)efx_reset(efx, fls(pending) - 1);
rtnl_unlock(); rtnl_unlock();
} }
...@@ -2175,12 +2177,6 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) ...@@ -2175,12 +2177,6 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
{ {
enum reset_type method; enum reset_type method;
if (efx->reset_pending != RESET_TYPE_NONE) {
netif_info(efx, drv, efx->net_dev,
"quenching already scheduled reset\n");
return;
}
switch (type) { switch (type) {
case RESET_TYPE_INVISIBLE: case RESET_TYPE_INVISIBLE:
case RESET_TYPE_ALL: case RESET_TYPE_ALL:
...@@ -2208,7 +2204,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) ...@@ -2208,7 +2204,7 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
RESET_TYPE(method)); RESET_TYPE(method));
efx->reset_pending = method; set_bit(method, &efx->reset_pending);
/* efx_process_channel() will no longer read events once a /* efx_process_channel() will no longer read events once a
* reset is scheduled. So switch back to poll'd MCDI completions. */ * reset is scheduled. So switch back to poll'd MCDI completions. */
...@@ -2288,7 +2284,6 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type, ...@@ -2288,7 +2284,6 @@ static int efx_init_struct(struct efx_nic *efx, const struct efx_nic_type *type,
efx->pci_dev = pci_dev; efx->pci_dev = pci_dev;
efx->msg_enable = debug; efx->msg_enable = debug;
efx->state = STATE_INIT; efx->state = STATE_INIT;
efx->reset_pending = RESET_TYPE_NONE;
strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
efx->net_dev = net_dev; efx->net_dev = net_dev;
...@@ -2510,7 +2505,7 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, ...@@ -2510,7 +2505,7 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
cancel_work_sync(&efx->reset_work); cancel_work_sync(&efx->reset_work);
if (rc == 0) { if (rc == 0) {
if (efx->reset_pending != RESET_TYPE_NONE) { if (efx->reset_pending) {
/* If there was a scheduled reset during /* If there was a scheduled reset during
* probe, the NIC is probably hosed anyway */ * probe, the NIC is probably hosed anyway */
efx_pci_remove_main(efx); efx_pci_remove_main(efx);
...@@ -2521,11 +2516,12 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev, ...@@ -2521,11 +2516,12 @@ static int __devinit efx_pci_probe(struct pci_dev *pci_dev,
} }
/* Retry if a recoverably reset event has been scheduled */ /* Retry if a recoverably reset event has been scheduled */
if ((efx->reset_pending != RESET_TYPE_INVISIBLE) && if (efx->reset_pending &
(efx->reset_pending != RESET_TYPE_ALL)) ~(1 << RESET_TYPE_INVISIBLE | 1 << RESET_TYPE_ALL) ||
!efx->reset_pending)
goto fail3; goto fail3;
efx->reset_pending = RESET_TYPE_NONE; efx->reset_pending = 0;
} }
if (rc) { if (rc) {
...@@ -2609,7 +2605,7 @@ static int efx_pm_poweroff(struct device *dev) ...@@ -2609,7 +2605,7 @@ static int efx_pm_poweroff(struct device *dev)
efx->type->fini(efx); efx->type->fini(efx);
efx->reset_pending = RESET_TYPE_NONE; efx->reset_pending = 0;
pci_save_state(pci_dev); pci_save_state(pci_dev);
return pci_set_power_state(pci_dev, PCI_D3hot); return pci_set_power_state(pci_dev, PCI_D3hot);
......
...@@ -134,6 +134,8 @@ enum efx_loopback_mode { ...@@ -134,6 +134,8 @@ enum efx_loopback_mode {
* other valuesspecify reasons, which efx_schedule_reset() will choose * other valuesspecify reasons, which efx_schedule_reset() will choose
* a method for. * a method for.
* *
* Reset methods are numbered in order of increasing scope.
*
* @RESET_TYPE_INVISIBLE: don't reset the PHYs or interrupts * @RESET_TYPE_INVISIBLE: don't reset the PHYs or interrupts
* @RESET_TYPE_ALL: reset everything but PCI core blocks * @RESET_TYPE_ALL: reset everything but PCI core blocks
* @RESET_TYPE_WORLD: reset everything, save & restore PCI config * @RESET_TYPE_WORLD: reset everything, save & restore PCI config
...@@ -147,7 +149,6 @@ enum efx_loopback_mode { ...@@ -147,7 +149,6 @@ enum efx_loopback_mode {
* @RESET_TYPE_MC_FAILURE: MC reboot/assertion * @RESET_TYPE_MC_FAILURE: MC reboot/assertion
*/ */
enum reset_type { enum reset_type {
RESET_TYPE_NONE = -1,
RESET_TYPE_INVISIBLE = 0, RESET_TYPE_INVISIBLE = 0,
RESET_TYPE_ALL = 1, RESET_TYPE_ALL = 1,
RESET_TYPE_WORLD = 2, RESET_TYPE_WORLD = 2,
......
...@@ -536,7 +536,7 @@ void falcon_reconfigure_mac_wrapper(struct efx_nic *efx) ...@@ -536,7 +536,7 @@ void falcon_reconfigure_mac_wrapper(struct efx_nic *efx)
efx_oword_t reg; efx_oword_t reg;
int link_speed, isolate; int link_speed, isolate;
isolate = (efx->reset_pending != RESET_TYPE_NONE); isolate = !!ACCESS_ONCE(efx->reset_pending);
switch (link_state->speed) { switch (link_state->speed) {
case 10000: link_speed = 3; break; case 10000: link_speed = 3; break;
......
...@@ -645,7 +645,7 @@ struct efx_filter_state; ...@@ -645,7 +645,7 @@ struct efx_filter_state;
* @irq_rx_moderation: IRQ moderation time for RX event queues * @irq_rx_moderation: IRQ moderation time for RX event queues
* @msg_enable: Log message enable flags * @msg_enable: Log message enable flags
* @state: Device state flag. Serialised by the rtnl_lock. * @state: Device state flag. Serialised by the rtnl_lock.
* @reset_pending: Pending reset method (normally RESET_TYPE_NONE) * @reset_pending: Bitmask for pending resets
* @tx_queue: TX DMA queues * @tx_queue: TX DMA queues
* @rx_queue: RX DMA queues * @rx_queue: RX DMA queues
* @channel: Channels * @channel: Channels
...@@ -728,7 +728,7 @@ struct efx_nic { ...@@ -728,7 +728,7 @@ struct efx_nic {
u32 msg_enable; u32 msg_enable;
enum nic_state state; enum nic_state state;
enum reset_type reset_pending; unsigned long reset_pending;
struct efx_channel *channel[EFX_MAX_CHANNELS]; struct efx_channel *channel[EFX_MAX_CHANNELS];
char channel_name[EFX_MAX_CHANNELS][IFNAMSIZ + 6]; char channel_name[EFX_MAX_CHANNELS][IFNAMSIZ + 6];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment