Commit f45bca8c authored by Quinn Tran's avatar Quinn Tran Committed by Martin K. Petersen

scsi: qla2xxx: Fix double scsi_done for abort path

Current code assumes abort will remove the original command from the active
list where scsi_done will not be called. Instead, the eh_abort thread will
do the scsi_done. That is not the case.  Instead, we have a double
scsi_done calls triggering use after free.

Abort will tell FW to release the command from FW possesion. The original
command will return to ULP with error in its normal fashion via scsi_done.
eh_abort path would wait for the original command completion before
returning.  eh_abort path will not perform the scsi_done call.

Fixes: 219d27d7 ("scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands")
Cc: stable@vger.kernel.org # 5.2
Link: https://lore.kernel.org/r/20191105150657.8092-6-hmadhani@marvell.comReviewed-by: default avatarEwan D. Milne <emilne@redhat.com>
Signed-off-by: default avatarQuinn Tran <qutran@marvell.com>
Signed-off-by: default avatarArun Easi <aeasi@marvell.com>
Signed-off-by: default avatarHimanshu Madhani <hmadhani@marvell.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent dd322b7f
...@@ -591,13 +591,16 @@ typedef struct srb { ...@@ -591,13 +591,16 @@ typedef struct srb {
*/ */
uint8_t cmd_type; uint8_t cmd_type;
uint8_t pad[3]; uint8_t pad[3];
atomic_t ref_count;
struct kref cmd_kref; /* need to migrate ref_count over to this */ struct kref cmd_kref; /* need to migrate ref_count over to this */
void *priv; void *priv;
wait_queue_head_t nvme_ls_waitq; wait_queue_head_t nvme_ls_waitq;
struct fc_port *fcport; struct fc_port *fcport;
struct scsi_qla_host *vha; struct scsi_qla_host *vha;
unsigned int start_timer:1; unsigned int start_timer:1;
unsigned int abort:1;
unsigned int aborted:1;
unsigned int completed:1;
uint32_t handle; uint32_t handle;
uint16_t flags; uint16_t flags;
uint16_t type; uint16_t type;
......
...@@ -2487,6 +2487,11 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) ...@@ -2487,6 +2487,11 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
return; return;
} }
if (sp->abort)
sp->aborted = 1;
else
sp->completed = 1;
if (sp->cmd_type != TYPE_SRB) { if (sp->cmd_type != TYPE_SRB) {
req->outstanding_cmds[handle] = NULL; req->outstanding_cmds[handle] = NULL;
ql_dbg(ql_dbg_io, vha, 0x3015, ql_dbg(ql_dbg_io, vha, 0x3015,
......
...@@ -224,8 +224,8 @@ static void qla_nvme_abort_work(struct work_struct *work) ...@@ -224,8 +224,8 @@ static void qla_nvme_abort_work(struct work_struct *work)
if (ha->flags.host_shutting_down) { if (ha->flags.host_shutting_down) {
ql_log(ql_log_info, sp->fcport->vha, 0xffff, ql_log(ql_log_info, sp->fcport->vha, 0xffff,
"%s Calling done on sp: %p, type: 0x%x, sp->ref_count: 0x%x\n", "%s Calling done on sp: %p, type: 0x%x\n",
__func__, sp, sp->type, atomic_read(&sp->ref_count)); __func__, sp, sp->type);
sp->done(sp, 0); sp->done(sp, 0);
goto out; goto out;
} }
......
...@@ -698,11 +698,6 @@ void qla2x00_sp_compl(srb_t *sp, int res) ...@@ -698,11 +698,6 @@ void qla2x00_sp_compl(srb_t *sp, int res)
struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct scsi_cmnd *cmd = GET_CMD_SP(sp);
struct completion *comp = sp->comp; struct completion *comp = sp->comp;
if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
return;
atomic_dec(&sp->ref_count);
sp->free(sp); sp->free(sp);
cmd->result = res; cmd->result = res;
CMD_SP(cmd) = NULL; CMD_SP(cmd) = NULL;
...@@ -794,11 +789,6 @@ void qla2xxx_qpair_sp_compl(srb_t *sp, int res) ...@@ -794,11 +789,6 @@ void qla2xxx_qpair_sp_compl(srb_t *sp, int res)
struct scsi_cmnd *cmd = GET_CMD_SP(sp); struct scsi_cmnd *cmd = GET_CMD_SP(sp);
struct completion *comp = sp->comp; struct completion *comp = sp->comp;
if (WARN_ON_ONCE(atomic_read(&sp->ref_count) == 0))
return;
atomic_dec(&sp->ref_count);
sp->free(sp); sp->free(sp);
cmd->result = res; cmd->result = res;
CMD_SP(cmd) = NULL; CMD_SP(cmd) = NULL;
...@@ -903,7 +893,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) ...@@ -903,7 +893,7 @@ qla2xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
sp->u.scmd.cmd = cmd; sp->u.scmd.cmd = cmd;
sp->type = SRB_SCSI_CMD; sp->type = SRB_SCSI_CMD;
atomic_set(&sp->ref_count, 1);
CMD_SP(cmd) = (void *)sp; CMD_SP(cmd) = (void *)sp;
sp->free = qla2x00_sp_free_dma; sp->free = qla2x00_sp_free_dma;
sp->done = qla2x00_sp_compl; sp->done = qla2x00_sp_compl;
...@@ -985,11 +975,9 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd, ...@@ -985,11 +975,9 @@ qla2xxx_mqueuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd,
sp->u.scmd.cmd = cmd; sp->u.scmd.cmd = cmd;
sp->type = SRB_SCSI_CMD; sp->type = SRB_SCSI_CMD;
atomic_set(&sp->ref_count, 1);
CMD_SP(cmd) = (void *)sp; CMD_SP(cmd) = (void *)sp;
sp->free = qla2xxx_qpair_sp_free_dma; sp->free = qla2xxx_qpair_sp_free_dma;
sp->done = qla2xxx_qpair_sp_compl; sp->done = qla2xxx_qpair_sp_compl;
sp->qpair = qpair;
rval = ha->isp_ops->start_scsi_mq(sp); rval = ha->isp_ops->start_scsi_mq(sp);
if (rval != QLA_SUCCESS) { if (rval != QLA_SUCCESS) {
...@@ -1187,16 +1175,6 @@ qla2x00_wait_for_chip_reset(scsi_qla_host_t *vha) ...@@ -1187,16 +1175,6 @@ qla2x00_wait_for_chip_reset(scsi_qla_host_t *vha)
return return_status; return return_status;
} }
static int
sp_get(struct srb *sp)
{
if (!refcount_inc_not_zero((refcount_t *)&sp->ref_count))
/* kref get fail */
return ENXIO;
else
return 0;
}
#define ISP_REG_DISCONNECT 0xffffffffU #define ISP_REG_DISCONNECT 0xffffffffU
/************************************************************************** /**************************************************************************
* qla2x00_isp_reg_stat * qla2x00_isp_reg_stat
...@@ -1252,6 +1230,9 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) ...@@ -1252,6 +1230,9 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
uint64_t lun; uint64_t lun;
int rval; int rval;
struct qla_hw_data *ha = vha->hw; struct qla_hw_data *ha = vha->hw;
uint32_t ratov_j;
struct qla_qpair *qpair;
unsigned long flags;
if (qla2x00_isp_reg_stat(ha)) { if (qla2x00_isp_reg_stat(ha)) {
ql_log(ql_log_info, vha, 0x8042, ql_log(ql_log_info, vha, 0x8042,
...@@ -1264,13 +1245,26 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) ...@@ -1264,13 +1245,26 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
return ret; return ret;
sp = scsi_cmd_priv(cmd); sp = scsi_cmd_priv(cmd);
qpair = sp->qpair;
if (sp->fcport && sp->fcport->deleted) if ((sp->fcport && sp->fcport->deleted) || !qpair)
return SUCCESS; return SUCCESS;
/* Return if the command has already finished. */ spin_lock_irqsave(qpair->qp_lock_ptr, flags);
if (sp_get(sp)) if (sp->completed) {
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
return SUCCESS; return SUCCESS;
}
if (sp->abort || sp->aborted) {
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
return FAILED;
}
sp->abort = 1;
sp->comp = &comp;
spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
id = cmd->device->id; id = cmd->device->id;
lun = cmd->device->lun; lun = cmd->device->lun;
...@@ -1279,47 +1273,37 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) ...@@ -1279,47 +1273,37 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
"Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n", "Aborting from RISC nexus=%ld:%d:%llu sp=%p cmd=%p handle=%x\n",
vha->host_no, id, lun, sp, cmd, sp->handle); vha->host_no, id, lun, sp, cmd, sp->handle);
/*
* Abort will release the original Command/sp from FW. Let the
* original command call scsi_done. In return, he will wakeup
* this sleeping thread.
*/
rval = ha->isp_ops->abort_command(sp); rval = ha->isp_ops->abort_command(sp);
ql_dbg(ql_dbg_taskm, vha, 0x8003, ql_dbg(ql_dbg_taskm, vha, 0x8003,
"Abort command mbx cmd=%p, rval=%x.\n", cmd, rval); "Abort command mbx cmd=%p, rval=%x.\n", cmd, rval);
/* Wait for the command completion. */
ratov_j = ha->r_a_tov/10 * 4 * 1000;
ratov_j = msecs_to_jiffies(ratov_j);
switch (rval) { switch (rval) {
case QLA_SUCCESS: case QLA_SUCCESS:
/*
* The command has been aborted. That means that the firmware
* won't report a completion.
*/
sp->done(sp, DID_ABORT << 16);
ret = SUCCESS;
break;
case QLA_FUNCTION_PARAMETER_ERROR: {
/* Wait for the command completion. */
uint32_t ratov = ha->r_a_tov/10;
uint32_t ratov_j = msecs_to_jiffies(4 * ratov * 1000);
WARN_ON_ONCE(sp->comp);
sp->comp = &comp;
if (!wait_for_completion_timeout(&comp, ratov_j)) { if (!wait_for_completion_timeout(&comp, ratov_j)) {
ql_dbg(ql_dbg_taskm, vha, 0xffff, ql_dbg(ql_dbg_taskm, vha, 0xffff,
"%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n", "%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
__func__, ha->r_a_tov); __func__, ha->r_a_tov/10);
ret = FAILED; ret = FAILED;
} else { } else {
ret = SUCCESS; ret = SUCCESS;
} }
break; break;
}
default: default:
/*
* Either abort failed or abort and completion raced. Let
* the SCSI core retry the abort in the former case.
*/
ret = FAILED; ret = FAILED;
break; break;
} }
sp->comp = NULL; sp->comp = NULL;
atomic_dec(&sp->ref_count);
ql_log(ql_log_info, vha, 0x801c, ql_log(ql_log_info, vha, 0x801c,
"Abort command issued nexus=%ld:%d:%llu -- %x.\n", "Abort command issued nexus=%ld:%d:%llu -- %x.\n",
vha->host_no, id, lun, ret); vha->host_no, id, lun, ret);
...@@ -1711,32 +1695,53 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res, ...@@ -1711,32 +1695,53 @@ static void qla2x00_abort_srb(struct qla_qpair *qp, srb_t *sp, const int res,
scsi_qla_host_t *vha = qp->vha; scsi_qla_host_t *vha = qp->vha;
struct qla_hw_data *ha = vha->hw; struct qla_hw_data *ha = vha->hw;
int rval; int rval;
bool ret_cmd;
uint32_t ratov_j;
if (sp_get(sp)) if (qla2x00_chip_is_down(vha)) {
sp->done(sp, res);
return; return;
}
if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS || if (sp->type == SRB_NVME_CMD || sp->type == SRB_NVME_LS ||
(sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy && (sp->type == SRB_SCSI_CMD && !ha->flags.eeh_busy &&
!test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) && !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
!qla2x00_isp_reg_stat(ha))) { !qla2x00_isp_reg_stat(ha))) {
if (sp->comp) {
sp->done(sp, res);
return;
}
sp->comp = &comp; sp->comp = &comp;
sp->abort = 1;
spin_unlock_irqrestore(qp->qp_lock_ptr, *flags); spin_unlock_irqrestore(qp->qp_lock_ptr, *flags);
rval = ha->isp_ops->abort_command(sp);
rval = ha->isp_ops->abort_command(sp);
/* Wait for command completion. */
ret_cmd = false;
ratov_j = ha->r_a_tov/10 * 4 * 1000;
ratov_j = msecs_to_jiffies(ratov_j);
switch (rval) { switch (rval) {
case QLA_SUCCESS: case QLA_SUCCESS:
sp->done(sp, res); if (wait_for_completion_timeout(&comp, ratov_j)) {
ql_dbg(ql_dbg_taskm, vha, 0xffff,
"%s: Abort wait timer (4 * R_A_TOV[%d]) expired\n",
__func__, ha->r_a_tov/10);
ret_cmd = true;
}
/* else FW return SP to driver */
break; break;
case QLA_FUNCTION_PARAMETER_ERROR: default:
wait_for_completion(&comp); ret_cmd = true;
break; break;
} }
spin_lock_irqsave(qp->qp_lock_ptr, *flags); spin_lock_irqsave(qp->qp_lock_ptr, *flags);
sp->comp = NULL; if (ret_cmd && (!sp->completed || !sp->aborted))
sp->done(sp, res);
} else {
sp->done(sp, res);
} }
atomic_dec(&sp->ref_count);
} }
static void static void
...@@ -1758,7 +1763,6 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) ...@@ -1758,7 +1763,6 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
sp = req->outstanding_cmds[cnt]; sp = req->outstanding_cmds[cnt];
if (sp) { if (sp) {
req->outstanding_cmds[cnt] = NULL;
switch (sp->cmd_type) { switch (sp->cmd_type) {
case TYPE_SRB: case TYPE_SRB:
qla2x00_abort_srb(qp, sp, res, &flags); qla2x00_abort_srb(qp, sp, res, &flags);
...@@ -1780,6 +1784,7 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res) ...@@ -1780,6 +1784,7 @@ __qla2x00_abort_all_cmds(struct qla_qpair *qp, int res)
default: default:
break; break;
} }
req->outstanding_cmds[cnt] = NULL;
} }
} }
spin_unlock_irqrestore(qp->qp_lock_ptr, flags); spin_unlock_irqrestore(qp->qp_lock_ptr, flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment