Commit a92fa25c authored by Kleber Sacilotto de Souza's avatar Kleber Sacilotto de Souza Committed by James Bottomley

[SCSI] ipr: fix eeh recovery for 64-bit adapters

In some scenarios, an EEH error can take a long time to be detected, since the
driver issues an MMIO read only after a device reset command times out and we
try to reset the adapter. This patch adds some code in ipr_cancel_op() to read
a hardware register so we detect the error earlier in case the op is being
aborted because of a timeout caused by a frozen adapter slot.

Another problem in such scenarios is that in __ipr_eh_host_reset() we change the
dump state flag from WAIT_FOR_DUMP to GET_DUMP, and the flag is later changed
from GET_DUMP to READ_DUMP in ipr_reset_restore_cfg_space(). However, if when
__ipr_eh_host_reset() is called by the SCSI error handling the function
ipr_reset_restore_cfg_space() has already been called by the PCI EEH code, we
end up with the flag in an inconsistent state. This patch also prevents this
problem.
Signed-off-by: default avatarKleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>
Acked-by: default avatarBrian King <brking@linux.vnet.ibm.com>
Signed-off-by: default avatarJames Bottomley <JBottomley@Parallels.com>
parent 7fbd7648
...@@ -4613,11 +4613,13 @@ static int __ipr_eh_host_reset(struct scsi_cmnd * scsi_cmd) ...@@ -4613,11 +4613,13 @@ static int __ipr_eh_host_reset(struct scsi_cmnd * scsi_cmd)
ENTER; ENTER;
ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata;
if (!ioa_cfg->in_reset_reload) {
dev_err(&ioa_cfg->pdev->dev, dev_err(&ioa_cfg->pdev->dev,
"Adapter being reset as a result of error recovery.\n"); "Adapter being reset as a result of error recovery.\n");
if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) if (WAIT_FOR_DUMP == ioa_cfg->sdt_state)
ioa_cfg->sdt_state = GET_DUMP; ioa_cfg->sdt_state = GET_DUMP;
}
rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV);
...@@ -4907,7 +4909,7 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) ...@@ -4907,7 +4909,7 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd)
struct ipr_ioa_cfg *ioa_cfg; struct ipr_ioa_cfg *ioa_cfg;
struct ipr_resource_entry *res; struct ipr_resource_entry *res;
struct ipr_cmd_pkt *cmd_pkt; struct ipr_cmd_pkt *cmd_pkt;
u32 ioasc; u32 ioasc, int_reg;
int op_found = 0; int op_found = 0;
ENTER; ENTER;
...@@ -4920,7 +4922,17 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd) ...@@ -4920,7 +4922,17 @@ static int ipr_cancel_op(struct scsi_cmnd * scsi_cmd)
*/ */
if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead)
return FAILED; return FAILED;
if (!res || !ipr_is_gscsi(res)) if (!res)
return FAILED;
/*
* If we are aborting a timed out op, chances are that the timeout was caused
* by a still not detected EEH error. In such cases, reading a register will
* trigger the EEH recovery infrastructure.
*/
int_reg = readl(ioa_cfg->regs.sense_interrupt_reg);
if (!ipr_is_gscsi(res))
return FAILED; return FAILED;
list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment