Commit 0a6fdbde authored by Bart Van Assche's avatar Bart Van Assche Committed by Doug Ledford

IB/srp: Fix race conditions related to task management

Avoid that srp_process_rsp() overwrites the status information
in ch if the SRP target response timed out and processing of
another task management function has already started. Avoid that
issuing multiple task management functions concurrently triggers
list corruption. This patch prevents that the following stack
trace appears in the system log:

WARNING: CPU: 8 PID: 9269 at lib/list_debug.c:52 __list_del_entry_valid+0xbc/0xc0
list_del corruption. prev->next should be ffffc90004bb7b00, but was ffff8804052ecc68
CPU: 8 PID: 9269 Comm: sg_reset Tainted: G        W       4.10.0-rc7-dbg+ #3
Call Trace:
 dump_stack+0x68/0x93
 __warn+0xc6/0xe0
 warn_slowpath_fmt+0x4a/0x50
 __list_del_entry_valid+0xbc/0xc0
 wait_for_completion_timeout+0x12e/0x170
 srp_send_tsk_mgmt+0x1ef/0x2d0 [ib_srp]
 srp_reset_device+0x5b/0x110 [ib_srp]
 scsi_ioctl_reset+0x1c7/0x290
 scsi_ioctl+0x12a/0x420
 sd_ioctl+0x9d/0x100
 blkdev_ioctl+0x51e/0x9f0
 block_ioctl+0x38/0x40
 do_vfs_ioctl+0x8f/0x700
 SyS_ioctl+0x3c/0x70
 entry_SYSCALL_64_fastpath+0x18/0xad
Signed-off-by: default avatarBart Van Assche <bart.vanassche@sandisk.com>
Cc: Israel Rukshin <israelr@mellanox.com>
Cc: Max Gurtovoy <maxg@mellanox.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Steve Feeley <Steve.Feeley@sandisk.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent 6cb72bc1
...@@ -1884,12 +1884,17 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) ...@@ -1884,12 +1884,17 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) { if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
spin_lock_irqsave(&ch->lock, flags); spin_lock_irqsave(&ch->lock, flags);
ch->req_lim += be32_to_cpu(rsp->req_lim_delta); ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
spin_unlock_irqrestore(&ch->lock, flags); if (rsp->tag == ch->tsk_mgmt_tag) {
ch->tsk_mgmt_status = -1; ch->tsk_mgmt_status = -1;
if (be32_to_cpu(rsp->resp_data_len) >= 4) if (be32_to_cpu(rsp->resp_data_len) >= 4)
ch->tsk_mgmt_status = rsp->data[3]; ch->tsk_mgmt_status = rsp->data[3];
complete(&ch->tsk_mgmt_done); complete(&ch->tsk_mgmt_done);
} else {
shost_printk(KERN_ERR, target->scsi_host,
"Received tsk mgmt response too late for tag %#llx\n",
rsp->tag);
}
spin_unlock_irqrestore(&ch->lock, flags);
} else { } else {
scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag); scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
if (scmnd && scmnd->host_scribble) { if (scmnd && scmnd->host_scribble) {
...@@ -2528,19 +2533,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth) ...@@ -2528,19 +2533,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
} }
static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
u8 func) u8 func, u8 *status)
{ {
struct srp_target_port *target = ch->target; struct srp_target_port *target = ch->target;
struct srp_rport *rport = target->rport; struct srp_rport *rport = target->rport;
struct ib_device *dev = target->srp_host->srp_dev->dev; struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu; struct srp_iu *iu;
struct srp_tsk_mgmt *tsk_mgmt; struct srp_tsk_mgmt *tsk_mgmt;
int res;
if (!ch->connected || target->qp_in_error) if (!ch->connected || target->qp_in_error)
return -1; return -1;
init_completion(&ch->tsk_mgmt_done);
/* /*
* Lock the rport mutex to avoid that srp_create_ch_ib() is * Lock the rport mutex to avoid that srp_create_ch_ib() is
* invoked while a task management function is being sent. * invoked while a task management function is being sent.
...@@ -2563,10 +2567,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, ...@@ -2563,10 +2567,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
tsk_mgmt->opcode = SRP_TSK_MGMT; tsk_mgmt->opcode = SRP_TSK_MGMT;
int_to_scsilun(lun, &tsk_mgmt->lun); int_to_scsilun(lun, &tsk_mgmt->lun);
tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
tsk_mgmt->tsk_mgmt_func = func; tsk_mgmt->tsk_mgmt_func = func;
tsk_mgmt->task_tag = req_tag; tsk_mgmt->task_tag = req_tag;
spin_lock_irq(&ch->lock);
ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
tsk_mgmt->tag = ch->tsk_mgmt_tag;
spin_unlock_irq(&ch->lock);
init_completion(&ch->tsk_mgmt_done);
ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt, ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE); DMA_TO_DEVICE);
if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) { if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
...@@ -2575,13 +2585,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, ...@@ -2575,13 +2585,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
return -1; return -1;
} }
res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
if (res > 0 && status)
*status = ch->tsk_mgmt_status;
mutex_unlock(&rport->mutex); mutex_unlock(&rport->mutex);
if (!wait_for_completion_timeout(&ch->tsk_mgmt_done, WARN_ON_ONCE(res < 0);
msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
return -1;
return 0; return res > 0 ? 0 : -1;
} }
static int srp_abort(struct scsi_cmnd *scmnd) static int srp_abort(struct scsi_cmnd *scmnd)
...@@ -2607,7 +2619,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) ...@@ -2607,7 +2619,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host, shost_printk(KERN_ERR, target->scsi_host,
"Sending SRP abort for tag %#x\n", tag); "Sending SRP abort for tag %#x\n", tag);
if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
SRP_TSK_ABORT_TASK) == 0) SRP_TSK_ABORT_TASK, NULL) == 0)
ret = SUCCESS; ret = SUCCESS;
else if (target->rport->state == SRP_RPORT_LOST) else if (target->rport->state == SRP_RPORT_LOST)
ret = FAST_IO_FAIL; ret = FAST_IO_FAIL;
...@@ -2625,14 +2637,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) ...@@ -2625,14 +2637,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
struct srp_target_port *target = host_to_target(scmnd->device->host); struct srp_target_port *target = host_to_target(scmnd->device->host);
struct srp_rdma_ch *ch; struct srp_rdma_ch *ch;
int i; int i;
u8 status;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
ch = &target->ch[0]; ch = &target->ch[0];
if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun, if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
SRP_TSK_LUN_RESET)) SRP_TSK_LUN_RESET, &status))
return FAILED; return FAILED;
if (ch->tsk_mgmt_status) if (status)
return FAILED; return FAILED;
for (i = 0; i < target->ch_count; i++) { for (i = 0; i < target->ch_count; i++) {
......
...@@ -163,6 +163,7 @@ struct srp_rdma_ch { ...@@ -163,6 +163,7 @@ struct srp_rdma_ch {
int max_ti_iu_len; int max_ti_iu_len;
int comp_vector; int comp_vector;
u64 tsk_mgmt_tag;
struct completion tsk_mgmt_done; struct completion tsk_mgmt_done;
u8 tsk_mgmt_status; u8 tsk_mgmt_status;
bool connected; bool connected;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment