Commit 9e73ed2e authored by Suganath Prabu S's avatar Suganath Prabu S Committed by Martin K. Petersen

scsi: mpt3sas: Cancel the running work during host reset

It is not recommended to issue back-to-back host reset without any delay.
However, if someone issues back-to-back host reset then we observe that
target devices get unregistered and re-register with SML.  And if OS drive
is behind the HBA when it gets unregistered, then file-system goes into
read-only mode.

Normally during host reset, driver marks accessible target devices as
responding and triggers the event MPT3SAS_REMOVE_UNRESPONDING_DEVICES to
remove any non-responding devices through FW worker thread. While
processing this event, driver unregisters the non-responding devices and
clears the responding flag for all the devices.

Currently, during host reset, driver is cancelling only those Firmware
event works which are pending in Firmware event workqueue. It is not
cancelling work which is currently running. Change the driver to cancel all
events.

Link: https://lore.kernel.org/r/1596096229-3341-4-git-send-email-suganath-prabu.subramani@broadcom.comSigned-off-by: default avatarSuganath Prabu S <suganath-prabu.subramani@broadcom.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent af6ec1ee
...@@ -1036,6 +1036,8 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc); ...@@ -1036,6 +1036,8 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc);
* @firmware_event_thread: "" * @firmware_event_thread: ""
* @fw_event_lock: * @fw_event_lock:
* @fw_event_list: list of fw events * @fw_event_list: list of fw events
* @current_evet: current processing firmware event
* @fw_event_cleanup: set to one while cleaning up the fw events
* @aen_event_read_flag: event log was read * @aen_event_read_flag: event log was read
* @broadcast_aen_busy: broadcast aen waiting to be serviced * @broadcast_aen_busy: broadcast aen waiting to be serviced
* @shost_recovery: host reset in progress * @shost_recovery: host reset in progress
...@@ -1217,6 +1219,8 @@ struct MPT3SAS_ADAPTER { ...@@ -1217,6 +1219,8 @@ struct MPT3SAS_ADAPTER {
struct workqueue_struct *firmware_event_thread; struct workqueue_struct *firmware_event_thread;
spinlock_t fw_event_lock; spinlock_t fw_event_lock;
struct list_head fw_event_list; struct list_head fw_event_list;
struct fw_event_work *current_event;
u8 fw_events_cleanup;
/* misc flags */ /* misc flags */
int aen_event_read_flag; int aen_event_read_flag;
......
...@@ -3323,11 +3323,13 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc) ...@@ -3323,11 +3323,13 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc)
{ {
struct fw_event_work *fw_event; struct fw_event_work *fw_event;
if (list_empty(&ioc->fw_event_list) || if ((list_empty(&ioc->fw_event_list) && !ioc->current_event) ||
!ioc->firmware_event_thread || in_interrupt()) !ioc->firmware_event_thread || in_interrupt())
return; return;
while ((fw_event = dequeue_next_fw_event(ioc))) { ioc->fw_events_cleanup = 1;
while ((fw_event = dequeue_next_fw_event(ioc)) ||
(fw_event = ioc->current_event)) {
/* /*
* Wait on the fw_event to complete. If this returns 1, then * Wait on the fw_event to complete. If this returns 1, then
* the event was never executed, and we need a put for the * the event was never executed, and we need a put for the
...@@ -3341,6 +3343,7 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc) ...@@ -3341,6 +3343,7 @@ _scsih_fw_event_cleanup_queue(struct MPT3SAS_ADAPTER *ioc)
fw_event_work_put(fw_event); fw_event_work_put(fw_event);
} }
ioc->fw_events_cleanup = 0;
} }
/** /**
...@@ -9421,11 +9424,13 @@ mpt3sas_scsih_reset_done_handler(struct MPT3SAS_ADAPTER *ioc) ...@@ -9421,11 +9424,13 @@ mpt3sas_scsih_reset_done_handler(struct MPT3SAS_ADAPTER *ioc)
static void static void
_mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event) _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
{ {
ioc->current_event = fw_event;
_scsih_fw_event_del_from_list(ioc, fw_event); _scsih_fw_event_del_from_list(ioc, fw_event);
/* the queue is being flushed so ignore this event */ /* the queue is being flushed so ignore this event */
if (ioc->remove_host || ioc->pci_error_recovery) { if (ioc->remove_host || ioc->pci_error_recovery) {
fw_event_work_put(fw_event); fw_event_work_put(fw_event);
ioc->current_event = NULL;
return; return;
} }
...@@ -9439,10 +9444,10 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event) ...@@ -9439,10 +9444,10 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
while (scsi_host_in_recovery(ioc->shost) || while (scsi_host_in_recovery(ioc->shost) ||
ioc->shost_recovery) { ioc->shost_recovery) {
/* /*
* If we're unloading, bail. Otherwise, this can become * If we're unloading or cancelling the work, bail.
* an infinite loop. * Otherwise, this can become an infinite loop.
*/ */
if (ioc->remove_host) if (ioc->remove_host || ioc->fw_events_cleanup)
goto out; goto out;
ssleep(1); ssleep(1);
} }
...@@ -9503,11 +9508,13 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event) ...@@ -9503,11 +9508,13 @@ _mpt3sas_fw_work(struct MPT3SAS_ADAPTER *ioc, struct fw_event_work *fw_event)
break; break;
case MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST: case MPI2_EVENT_PCIE_TOPOLOGY_CHANGE_LIST:
_scsih_pcie_topology_change_event(ioc, fw_event); _scsih_pcie_topology_change_event(ioc, fw_event);
ioc->current_event = NULL;
return; return;
break; break;
} }
out: out:
fw_event_work_put(fw_event); fw_event_work_put(fw_event);
ioc->current_event = NULL;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment