Commit 219d27d7 authored by Bart Van Assche's avatar Bart Van Assche Committed by Martin K. Petersen

scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands

In the *_done() functions, instead of returning early if sp->ref_count >=
2, only decrement sp->ref_count. In qla2xxx_eh_abort(), instead of deciding
what to do based on the value of sp->ref_count, decide which action to take
depending on the completion status of the firmware abort. Remove srb.cwaitq
and use srb.comp instead. In qla2x00_abort_srb(), call
isp_ops->abort_command() directly instead of calling qla2xxx_eh_abort().

Cc: Himanshu Madhani <hmadhani@marvell.com>
Cc: Giridhar Malavali <gmalavali@marvell.com>
Signed-off-by: default avatarBart Van Assche <bvanassche@acm.org>
Acked-by: default avatarHimanshu Madhani <hmadhani@marvell.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 982cc4be
...@@ -546,7 +546,6 @@ typedef struct srb { ...@@ -546,7 +546,6 @@ typedef struct srb {
int rc; int rc;
int retry_count; int retry_count;
struct completion *comp; struct completion *comp;
wait_queue_head_t *cwaitq;
union { union {
struct srb_iocb iocb_cmd; struct srb_iocb iocb_cmd;
struct bsg_job *bsg_job; struct bsg_job *bsg_job;
......
...@@ -137,8 +137,7 @@ static void qla_nvme_sp_ls_done(void *ptr, int res) ...@@ -137,8 +137,7 @@ static void qla_nvme_sp_ls_done(void *ptr, int res)
return; return;
} }
if (!atomic_dec_and_test(&sp->ref_count)) atomic_dec(&sp->ref_count);
return;
if (res) if (res)
res = -EINVAL; res = -EINVAL;
...@@ -161,8 +160,7 @@ static void qla_nvme_sp_done(void *ptr, int res) ...@@ -161,8 +160,7 @@ static void qla_nvme_sp_done(void *ptr, int res)
nvme = &sp->u.iocb_cmd; nvme = &sp->u.iocb_cmd;
fd = nvme->u.nvme.desc; fd = nvme->u.nvme.desc;
if (!atomic_dec_and_test(&sp->ref_count)) atomic_dec(&sp->ref_count);
return;
if (res == QLA_SUCCESS) { if (res == QLA_SUCCESS) {
fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len; fd->rcv_rsplen = nvme->u.nvme.rsp_pyld_len;
...@@ -599,34 +597,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = { ...@@ -599,34 +597,6 @@ static struct nvme_fc_port_template qla_nvme_fc_transport = {
.fcprqst_priv_sz = sizeof(struct nvme_private), .fcprqst_priv_sz = sizeof(struct nvme_private),
}; };
#define NVME_ABORT_POLLING_PERIOD 2
static int qla_nvme_wait_on_command(srb_t *sp)
{
int ret = QLA_SUCCESS;
wait_event_timeout(sp->nvme_ls_waitq, (atomic_read(&sp->ref_count) > 1),
NVME_ABORT_POLLING_PERIOD*HZ);
if (atomic_read(&sp->ref_count) > 1)
ret = QLA_FUNCTION_FAILED;
return ret;
}
void qla_nvme_abort(struct qla_hw_data *ha, struct srb *sp, int res)
{
int rval;
if (ha->flags.fw_started) {
rval = ha->isp_ops->abort_command(sp);
if (!rval && !qla_nvme_wait_on_command(sp))
ql_log(ql_log_warn, NULL, 0x2112,
"timed out waiting on sp=%p\n", sp);
} else {
sp->done(sp, res);
}
}
static void qla_nvme_unregister_remote_port(struct work_struct *work) static void qla_nvme_unregister_remote_port(struct work_struct *work)
{ {
struct fc_port *fcport = container_of(work, struct fc_port, struct fc_port *fcport = container_of(work, struct fc_port,
......
...@@ -145,7 +145,6 @@ struct pt_ls4_rx_unsol { ...@@ -145,7 +145,6 @@ struct pt_ls4_rx_unsol {
int qla_nvme_register_hba(struct scsi_qla_host *); int qla_nvme_register_hba(struct scsi_qla_host *);
int qla_nvme_register_remote(struct scsi_qla_host *, struct fc_port *); int qla_nvme_register_remote(struct scsi_qla_host *, struct fc_port *);
void qla_nvme_delete(struct scsi_qla_host *); void qla_nvme_delete(struct scsi_qla_host *);
void qla_nvme_abort(struct qla_hw_data *, struct srb *sp, int res);
void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *, struct pt_ls4_request *, void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *, struct pt_ls4_request *,
struct req_que *); struct req_que *);
void qla24xx_async_gffid_sp_done(void *, int); void qla24xx_async_gffid_sp_done(void *, int);
......
...@@ -714,7 +714,7 @@ qla2x00_sp_compl(void *ptr, int res) ...@@ -714,7 +714,7 @@ qla2x00_sp_compl(void *ptr, int res)
{ {
srb_t *sp = ptr; srb_t *sp = ptr;
struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct scsi_cmnd *cmd = GET_CMD_SP(sp);
wait_queue_head_t *cwaitq = sp->cwaitq; struct completion *comp = sp->comp;
if (atomic_read(&sp->ref_count) == 0) { if (atomic_read(&sp->ref_count) == 0) {
ql_dbg(ql_dbg_io, sp->vha, 0x3015, ql_dbg(ql_dbg_io, sp->vha, 0x3015,
...@@ -724,15 +724,15 @@ qla2x00_sp_compl(void *ptr, int res) ...@@ -724,15 +724,15 @@ qla2x00_sp_compl(void *ptr, int res)
WARN_ON(atomic_read(&sp->ref_count) == 0); WARN_ON(atomic_read(&sp->ref_count) == 0);
return; return;
} }
if (!atomic_dec_and_test(&sp->ref_count))
return; atomic_dec(&sp->ref_count);
sp->free(sp); sp->free(sp);
cmd->result = res; cmd->result = res;
CMD_SP(cmd) = NULL; CMD_SP(cmd) = NULL;
cmd->scsi_done(cmd); cmd->scsi_done(cmd);
if (cwaitq) if (comp)
wake_up(cwaitq); complete(comp);
qla2x00_rel_sp(sp); qla2x00_rel_sp(sp);
} }
...@@ -825,7 +825,7 @@ qla2xxx_qpair_sp_compl(void *ptr, int res) ...@@ -825,7 +825,7 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
{ {
srb_t *sp = ptr; srb_t *sp = ptr;
struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct scsi_cmnd *cmd = GET_CMD_SP(sp);
wait_queue_head_t *cwaitq = sp->cwaitq; struct completion *comp = sp->comp;
if (atomic_read(&sp->ref_count) == 0) { if (atomic_read(&sp->ref_count) == 0) {
ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079, ql_dbg(ql_dbg_io, sp->fcport->vha, 0x3079,
...@@ -835,15 +835,15 @@ qla2xxx_qpair_sp_compl(void *ptr, int res) ...@@ -835,15 +835,15 @@ qla2xxx_qpair_sp_compl(void *ptr, int res)
WARN_ON(atomic_read(&sp->ref_count) == 0); WARN_ON(atomic_read(&sp->ref_count) == 0);
return; return;
} }
if (!atomic_dec_and_test(&sp->ref_count))
return; atomic_dec(&sp->ref_count);
sp->free(sp); sp->free(sp);
cmd->result = res; cmd->result = res;
CMD_SP(cmd) = NULL; CMD_SP(cmd) = NULL;
cmd->scsi_done(cmd); cmd->scsi_done(cmd);
if (cwaitq) if (comp)
wake_up(cwaitq); complete(comp);
qla2xxx_rel_qpair_sp(sp->qpair, sp); qla2xxx_rel_qpair_sp(sp->qpair, sp);
} }
...@@ -1286,7 +1286,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) ...@@ -1286,7 +1286,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
unsigned int id; unsigned int id;
uint64_t lun; uint64_t lun;
unsigned long flags; unsigned long flags;
int rval, wait = 0; int rval;
struct qla_hw_data *ha = vha->hw; struct qla_hw_data *ha = vha->hw;
struct qla_qpair *qpair; struct qla_qpair *qpair;
...@@ -1299,7 +1299,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) ...@@ -1299,7 +1299,6 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
ret = fc_block_scsi_eh(cmd); ret = fc_block_scsi_eh(cmd);
if (ret != 0) if (ret != 0)
return ret; return ret;
ret = SUCCESS;
sp = (srb_t *) CMD_SP(cmd); sp = (srb_t *) CMD_SP(cmd);
if (!sp) if (!sp)
...@@ -1310,7 +1309,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) ...@@ -1310,7 +1309,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
return SUCCESS; return SUCCESS;
spin_lock_irqsave(qpair->qp_lock_ptr, flags); spin_lock_irqsave(qpair->qp_lock_ptr, flags);
if (!CMD_SP(cmd)) { if (sp->type != SRB_SCSI_CMD || GET_CMD_SP(sp) != cmd) {
/* there's a chance an interrupt could clear /* there's a chance an interrupt could clear
the ptr as part of done & free */ the ptr as part of done & free */
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
...@@ -1331,66 +1330,31 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) ...@@ -1331,66 +1330,31 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
"Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n", "Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n",
vha->host_no, id, lun, sp, cmd, sp->handle); vha->host_no, id, lun, sp, cmd, sp->handle);
/* Get a reference to the sp and drop the lock.*/
rval = ha->isp_ops->abort_command(sp); rval = ha->isp_ops->abort_command(sp);
if (rval) { ql_dbg(ql_dbg_taskm, vha, 0x8003,
if (rval == QLA_FUNCTION_PARAMETER_ERROR) "Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
ret = SUCCESS;
else
ret = FAILED;
ql_dbg(ql_dbg_taskm, vha, 0x8003,
"Abort command mbx failed cmd=%p, rval=%x.\n", cmd, rval);
} else {
ql_dbg(ql_dbg_taskm, vha, 0x8004,
"Abort command mbx success cmd=%p.\n", cmd);
wait = 1;
}
spin_lock_irqsave(qpair->qp_lock_ptr, flags); switch (rval) {
case QLA_SUCCESS:
/*
* Releasing of the SRB and associated command resources
* is managed through ref_count.
* Whether we need to wait for the abort completion or complete
* the abort handler should be based on the ref_count.
*/
if (atomic_read(&sp->ref_count) > 1) {
/* /*
* The command is not yet completed. We need to wait for either * The command has been aborted. That means that the firmware
* command completion or abort completion. * won't report a completion.
*/ */
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(eh_waitq); sp->done(sp, DID_ABORT << 16);
uint32_t ratov = ha->r_a_tov/10; ret = SUCCESS;
break;
/* Go ahead and release the extra ref_count obtained earlier */ default:
sp->done(sp, DID_RESET << 16); /*
sp->cwaitq = &eh_waitq; * Either abort failed or abort and completion raced. Let
* the SCSI core retry the abort in the former case.
if (!wait_event_lock_irq_timeout(eh_waitq, */
CMD_SP(cmd) == NULL, *qpair->qp_lock_ptr, ret = FAILED;
msecs_to_jiffies(4 * ratov * 1000))) { break;
/*
* The abort got dropped, LOGO will be sent and the
* original command will be completed with CS_TIMEOUT
* completion
*/
ql_dbg(ql_dbg_taskm, vha, 0xffff,
"%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
__func__, ha->r_a_tov);
sp->cwaitq = NULL;
ret = FAILED;
goto end;
}
} else {
/* Command completed while processing the abort */
sp->done(sp, DID_RESET << 16);
} }
end:
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
ql_log(ql_log_info, vha, 0x801c, ql_log(ql_log_info, vha, 0x801c,
"Abort command issued nexus=%ld:%d:%llu -- %d %x.\n", "Abort command issued nexus=%ld:%d:%llu -- %x.\n",
vha->host_no, id, lun, wait, ret); vha->host_no, id, lun, ret);
return ret; return ret;
} }
...@@ -1766,42 +1730,34 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res, ...@@ -1766,42 +1730,34 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
__releases(qp->qp_lock_ptr) __releases(qp->qp_lock_ptr)
__acquires(qp->qp_lock_ptr) __acquires(qp->qp_lock_ptr)
{ {
DECLARE_COMPLETION_ONSTACK(comp);
scsi_qla_host_t *vha = qp->vha; scsi_qla_host_t *vha = qp->vha;
struct qla_hw_data *ha = vha->hw; struct qla_hw_data *ha = vha->hw;
int rval;
if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS) { if (sp_get(sp))
if (!sp_get(sp)) { return;
/* got sp */
spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
qla_nvme_abort(ha, sp, res);
spin_lock_irqsave(qp->qp_lock_ptr, *flags);
}
} else if (GET_CMD_SP(sp) && !ha->flags.eeh_busy &&
!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
!qla2x00_isp_reg_stat(ha) && sp->type == SRB_SCSI_CMD) {
/*
* Don't abort commands in adapter during EEH recovery as it's
* not accessible/responding.
*
* Get a reference to the sp and drop the lock. The reference
* ensures this sp->done() call and not the call in
* qla2xxx_eh_abort() ends the SCSI cmd (with result 'res').
*/
if (!sp_get(sp)) {
int status;
spin_unlock_irqrestore(qp->qp_lock_ptr, *flags); if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS ||
status = qla2xxx_eh_abort(GET_CMD_SP(sp)); (sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy &&
spin_lock_irqsave(qp->qp_lock_ptr, *flags); !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
/* !qla2x00_isp_reg_stat(ha))) {
* Get rid of extra reference caused sp->comp = &comp;
* by early exit from qla2xxx_eh_abort rval = ha->isp_ops->abort_command(sp);
*/ spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
if (status == FAST_IO_FAIL)
atomic_dec(&sp->ref_count); switch (rval) {
case QLA_SUCCESS:
sp->done(sp, res);
break;
case QLA_FUNCTION_PARAMETER_ERROR:
wait_for_completion(&comp);
break;
} }
spin_lock_irqsave(qp->qp_lock_ptr, *flags);
sp->comp = NULL;
} }
sp->done(sp, res);
} }
static void static void
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment