Commit f4ba4e55 authored by Manish Rangankar's avatar Manish Rangankar Committed by Martin K. Petersen

scsi: qedi: Add firmware error recovery invocation support

Add support to initiate MFW process recovery for all the devices if storage
function receives the event first.

Also added fix for kernel test robot warning,

>> drivers/scsi/qedi/qedi_main.c:1119:6: warning: no previous prototype
>> for 'qedi_schedule_hw_err_handler' [-Wmissing-prototypes]

Link: https://lore.kernel.org/r/20200908095657.26821-8-mrangankar@marvell.comReported-by: default avatarkernel test robot <lkp@intel.com>
Signed-off-by: default avatarManish Rangankar <mrangankar@marvell.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 4118879b
...@@ -274,6 +274,10 @@ struct qedi_ctx { ...@@ -274,6 +274,10 @@ struct qedi_ctx {
spinlock_t ll2_lock; /* Light L2 lock */ spinlock_t ll2_lock; /* Light L2 lock */
spinlock_t hba_lock; /* per port lock */ spinlock_t hba_lock; /* per port lock */
struct task_struct *ll2_recv_thread; struct task_struct *ll2_recv_thread;
unsigned long qedi_err_flags;
#define QEDI_ERR_ATTN_CLR_EN 0
#define QEDI_ERR_IS_RECOVERABLE 2
#define QEDI_ERR_OVERRIDE_EN 31
unsigned long flags; unsigned long flags;
#define UIO_DEV_OPENED 1 #define UIO_DEV_OPENED 1
#define QEDI_IOTHREAD_WAKE 2 #define QEDI_IOTHREAD_WAKE 2
......
...@@ -1267,7 +1267,8 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, ...@@ -1267,7 +1267,8 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
rval = wait_event_interruptible_timeout(qedi_conn->wait_queue, rval = wait_event_interruptible_timeout(qedi_conn->wait_queue,
((qedi_conn->cmd_cleanup_req == ((qedi_conn->cmd_cleanup_req ==
qedi_conn->cmd_cleanup_cmpl) || qedi_conn->cmd_cleanup_cmpl) ||
qedi_conn->ep), test_bit(QEDI_IN_RECOVERY,
&qedi->flags)),
5 * HZ); 5 * HZ);
if (rval) { if (rval) {
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM, QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_SCSI_TM,
...@@ -1292,7 +1293,9 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn, ...@@ -1292,7 +1293,9 @@ int qedi_cleanup_all_io(struct qedi_ctx *qedi, struct qedi_conn *qedi_conn,
/* Enable IOs for all other sessions except current.*/ /* Enable IOs for all other sessions except current.*/
if (!wait_event_interruptible_timeout(qedi_conn->wait_queue, if (!wait_event_interruptible_timeout(qedi_conn->wait_queue,
(qedi_conn->cmd_cleanup_req == (qedi_conn->cmd_cleanup_req ==
qedi_conn->cmd_cleanup_cmpl), qedi_conn->cmd_cleanup_cmpl) ||
test_bit(QEDI_IN_RECOVERY,
&qedi->flags),
5 * HZ)) { 5 * HZ)) {
iscsi_host_for_each_session(qedi->shost, iscsi_host_for_each_session(qedi->shost,
qedi_mark_device_available); qedi_mark_device_available);
......
...@@ -1072,7 +1072,8 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) ...@@ -1072,7 +1072,8 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep)
qedi_ep->state = EP_STATE_DISCONN_START; qedi_ep->state = EP_STATE_DISCONN_START;
if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags)) if (test_bit(QEDI_IN_SHUTDOWN, &qedi->flags) ||
test_bit(QEDI_IN_RECOVERY, &qedi->flags))
goto ep_release_conn; goto ep_release_conn;
ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn); ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn);
......
...@@ -50,6 +50,10 @@ module_param(qedi_ll2_buf_size, uint, 0644); ...@@ -50,6 +50,10 @@ module_param(qedi_ll2_buf_size, uint, 0644);
MODULE_PARM_DESC(qedi_ll2_buf_size, MODULE_PARM_DESC(qedi_ll2_buf_size,
"parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400."); "parameter to set ping packet size, default - 0x400, Jumbo packets - 0x2400.");
static uint qedi_flags_override;
module_param(qedi_flags_override, uint, 0644);
MODULE_PARM_DESC(qedi_flags_override, "Disable/Enable MFW error flags bits action.");
const struct qed_iscsi_ops *qedi_ops; const struct qed_iscsi_ops *qedi_ops;
static struct scsi_transport_template *qedi_scsi_transport; static struct scsi_transport_template *qedi_scsi_transport;
static struct pci_driver qedi_pci_driver; static struct pci_driver qedi_pci_driver;
...@@ -63,6 +67,8 @@ static void qedi_reset_uio_rings(struct qedi_uio_dev *udev); ...@@ -63,6 +67,8 @@ static void qedi_reset_uio_rings(struct qedi_uio_dev *udev);
static void qedi_ll2_free_skbs(struct qedi_ctx *qedi); static void qedi_ll2_free_skbs(struct qedi_ctx *qedi);
static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi); static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
static void qedi_recovery_handler(struct work_struct *work); static void qedi_recovery_handler(struct work_struct *work);
static void qedi_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type);
static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle) static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
{ {
...@@ -1112,6 +1118,39 @@ static void qedi_get_protocol_tlv_data(void *dev, void *data) ...@@ -1112,6 +1118,39 @@ static void qedi_get_protocol_tlv_data(void *dev, void *data)
return; return;
} }
void qedi_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type)
{
struct qedi_ctx *qedi = (struct qedi_ctx *)dev;
unsigned long override_flags = qedi_flags_override;
if (override_flags && test_bit(QEDI_ERR_OVERRIDE_EN, &override_flags))
qedi->qedi_err_flags = qedi_flags_override;
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
"HW error handler scheduled, err=%d err_flags=0x%x\n",
err_type, qedi->qedi_err_flags);
switch (err_type) {
case QED_HW_ERR_MFW_RESP_FAIL:
case QED_HW_ERR_HW_ATTN:
case QED_HW_ERR_DMAE_FAIL:
case QED_HW_ERR_RAMROD_FAIL:
case QED_HW_ERR_FW_ASSERT:
/* Prevent HW attentions from being reasserted */
if (test_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags))
qedi_ops->common->attn_clr_enable(qedi->cdev, true);
if (err_type == QED_HW_ERR_RAMROD_FAIL &&
test_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags))
qedi_ops->common->recovery_process(qedi->cdev);
break;
default:
break;
}
}
static void qedi_schedule_recovery_handler(void *dev) static void qedi_schedule_recovery_handler(void *dev)
{ {
struct qedi_ctx *qedi = dev; struct qedi_ctx *qedi = dev;
...@@ -1154,6 +1193,7 @@ static struct qed_iscsi_cb_ops qedi_cb_ops = { ...@@ -1154,6 +1193,7 @@ static struct qed_iscsi_cb_ops qedi_cb_ops = {
{ {
.link_update = qedi_link_update, .link_update = qedi_link_update,
.schedule_recovery_handler = qedi_schedule_recovery_handler, .schedule_recovery_handler = qedi_schedule_recovery_handler,
.schedule_hw_err_handler = qedi_schedule_hw_err_handler,
.get_protocol_tlv_data = qedi_get_protocol_tlv_data, .get_protocol_tlv_data = qedi_get_protocol_tlv_data,
.get_generic_tlv_data = qedi_get_generic_tlv_data, .get_generic_tlv_data = qedi_get_generic_tlv_data,
} }
...@@ -2354,6 +2394,7 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) ...@@ -2354,6 +2394,7 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
{ {
struct qedi_ctx *qedi = pci_get_drvdata(pdev); struct qedi_ctx *qedi = pci_get_drvdata(pdev);
int rval; int rval;
u16 retry = 10;
if (mode == QEDI_MODE_SHUTDOWN) if (mode == QEDI_MODE_SHUTDOWN)
iscsi_host_for_each_session(qedi->shost, iscsi_host_for_each_session(qedi->shost,
...@@ -2382,7 +2423,13 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) ...@@ -2382,7 +2423,13 @@ static void __qedi_remove(struct pci_dev *pdev, int mode)
qedi_sync_free_irqs(qedi); qedi_sync_free_irqs(qedi);
if (!test_bit(QEDI_IN_OFFLINE, &qedi->flags)) { if (!test_bit(QEDI_IN_OFFLINE, &qedi->flags)) {
qedi_ops->stop(qedi->cdev); while (retry--) {
rval = qedi_ops->stop(qedi->cdev);
if (rval < 0)
msleep(1000);
else
break;
}
qedi_ops->ll2->stop(qedi->cdev); qedi_ops->ll2->stop(qedi->cdev);
} }
...@@ -2441,6 +2488,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) ...@@ -2441,6 +2488,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
struct qed_probe_params qed_params; struct qed_probe_params qed_params;
void *task_start, *task_end; void *task_start, *task_end;
int rc; int rc;
u16 retry = 10;
if (mode != QEDI_MODE_RECOVERY) { if (mode != QEDI_MODE_RECOVERY) {
qedi = qedi_host_alloc(pdev); qedi = qedi_host_alloc(pdev);
...@@ -2452,6 +2500,10 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) ...@@ -2452,6 +2500,10 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
qedi = pci_get_drvdata(pdev); qedi = pci_get_drvdata(pdev);
} }
retry_probe:
if (mode == QEDI_MODE_RECOVERY)
msleep(2000);
memset(&qed_params, 0, sizeof(qed_params)); memset(&qed_params, 0, sizeof(qed_params));
qed_params.protocol = QED_PROTOCOL_ISCSI; qed_params.protocol = QED_PROTOCOL_ISCSI;
qed_params.dp_module = qedi_qed_debug; qed_params.dp_module = qedi_qed_debug;
...@@ -2459,11 +2511,20 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) ...@@ -2459,11 +2511,20 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
qed_params.is_vf = is_vf; qed_params.is_vf = is_vf;
qedi->cdev = qedi_ops->common->probe(pdev, &qed_params); qedi->cdev = qedi_ops->common->probe(pdev, &qed_params);
if (!qedi->cdev) { if (!qedi->cdev) {
if (mode == QEDI_MODE_RECOVERY && retry) {
QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
"Retry %d initialize hardware\n", retry);
retry--;
goto retry_probe;
}
rc = -ENODEV; rc = -ENODEV;
QEDI_ERR(&qedi->dbg_ctx, "Cannot initialize hardware\n"); QEDI_ERR(&qedi->dbg_ctx, "Cannot initialize hardware\n");
goto free_host; goto free_host;
} }
set_bit(QEDI_ERR_ATTN_CLR_EN, &qedi->qedi_err_flags);
set_bit(QEDI_ERR_IS_RECOVERABLE, &qedi->qedi_err_flags);
atomic_set(&qedi->link_state, QEDI_LINK_DOWN); atomic_set(&qedi->link_state, QEDI_LINK_DOWN);
rc = qedi_ops->fill_dev_info(qedi->cdev, &qedi->dev_info); rc = qedi_ops->fill_dev_info(qedi->cdev, &qedi->dev_info);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment