Commit e818a5b4 authored by Sagi Grimberg's avatar Sagi Grimberg Committed by Christoph Hellwig

nvme-rdma: fast fail incoming requests while we reconnect

When we encounter an transport/controller errors, error recovery
kicks in which performs:
1. stops io/admin queues
2. moves transport queues out of LIVE state
3. fast fail pending io
4. schedule periodic reconnects.

But we also need to fast fail incoming IO taht enters after we
already scheduled. Given that our queue is not LIVE anymore, simply
restart the request queues to fail in .queue_rq
Reported-by: default avatarAlex Turin <alex@vastdata.com>
Reported-by: default avatarshahar.salzman <shahar.salzman@gmail.com>
Signed-off-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Cc: stable@vger.kernel.org
parent 82b057ca
...@@ -753,28 +753,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ...@@ -753,28 +753,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (ret) if (ret)
goto requeue; goto requeue;
blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
ret = nvmf_connect_admin_queue(&ctrl->ctrl); ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (ret) if (ret)
goto stop_admin_q; goto requeue;
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
if (ret) if (ret)
goto stop_admin_q; goto requeue;
nvme_start_keep_alive(&ctrl->ctrl); nvme_start_keep_alive(&ctrl->ctrl);
if (ctrl->queue_count > 1) { if (ctrl->queue_count > 1) {
ret = nvme_rdma_init_io_queues(ctrl); ret = nvme_rdma_init_io_queues(ctrl);
if (ret) if (ret)
goto stop_admin_q; goto requeue;
ret = nvme_rdma_connect_io_queues(ctrl); ret = nvme_rdma_connect_io_queues(ctrl);
if (ret) if (ret)
goto stop_admin_q; goto requeue;
} }
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
...@@ -782,7 +780,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ...@@ -782,7 +780,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
ctrl->ctrl.opts->nr_reconnects = 0; ctrl->ctrl.opts->nr_reconnects = 0;
if (ctrl->queue_count > 1) { if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl);
nvme_queue_async_events(&ctrl->ctrl); nvme_queue_async_events(&ctrl->ctrl);
} }
...@@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ...@@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
return; return;
stop_admin_q:
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
requeue: requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.opts->nr_reconnects); ctrl->ctrl.opts->nr_reconnects);
...@@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) ...@@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl); nvme_cancel_request, &ctrl->ctrl);
/*
* queues are not a live anymore, so restart the queues to fail fast
* new IO
*/
blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_reconnect_or_remove(ctrl); nvme_rdma_reconnect_or_remove(ctrl);
} }
...@@ -1433,7 +1435,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) ...@@ -1433,7 +1435,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
/* /*
* We cannot accept any other command until the Connect command has completed. * We cannot accept any other command until the Connect command has completed.
*/ */
static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
struct request *rq) struct request *rq)
{ {
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
...@@ -1441,11 +1443,22 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, ...@@ -1441,11 +1443,22 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
if (!blk_rq_is_passthrough(rq) || if (!blk_rq_is_passthrough(rq) ||
cmd->common.opcode != nvme_fabrics_command || cmd->common.opcode != nvme_fabrics_command ||
cmd->fabrics.fctype != nvme_fabrics_type_connect) cmd->fabrics.fctype != nvme_fabrics_type_connect) {
return false; /*
* reconnecting state means transport disruption, which
* can take a long time and even might fail permanently,
* so we can't let incoming I/O be requeued forever.
* fail it fast to allow upper layers a chance to
* failover.
*/
if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
return -EIO;
else
return -EAGAIN;
}
} }
return true; return 0;
} }
static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
...@@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
WARN_ON_ONCE(rq->tag < 0); WARN_ON_ONCE(rq->tag < 0);
if (!nvme_rdma_queue_is_ready(queue, rq)) ret = nvme_rdma_queue_is_ready(queue, rq);
return BLK_MQ_RQ_QUEUE_BUSY; if (unlikely(ret))
goto err;
dev = queue->device->dev; dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma, ib_dma_sync_single_for_cpu(dev, sqe->dma,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment