Commit 85d0331a authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.12' of git://git.infradead.org/nvme into for-linus

Christoph writes:

"A few NVMe fixes for 4.12-rc, PCIe reset fixes and APST fixes, a
 RDMA reconnect fix, two FC fixes and a general controller removal fix."
parents 64604957 9947d6a0
...@@ -56,7 +56,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have"); ...@@ -56,7 +56,7 @@ MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
static int nvme_char_major; static int nvme_char_major;
module_param(nvme_char_major, int, 0); module_param(nvme_char_major, int, 0);
static unsigned long default_ps_max_latency_us = 25000; static unsigned long default_ps_max_latency_us = 100000;
module_param(default_ps_max_latency_us, ulong, 0644); module_param(default_ps_max_latency_us, ulong, 0644);
MODULE_PARM_DESC(default_ps_max_latency_us, MODULE_PARM_DESC(default_ps_max_latency_us,
"max power saving latency for new devices; use PM QOS to change per device"); "max power saving latency for new devices; use PM QOS to change per device");
...@@ -1342,7 +1342,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) ...@@ -1342,7 +1342,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
* transitioning between power states. Therefore, when running * transitioning between power states. Therefore, when running
* in any given state, we will enter the next lower-power * in any given state, we will enter the next lower-power
* non-operational state after waiting 50 * (enlat + exlat) * non-operational state after waiting 50 * (enlat + exlat)
* microseconds, as long as that state's total latency is under * microseconds, as long as that state's exit latency is under
* the requested maximum latency. * the requested maximum latency.
* *
* We will not autonomously enter any non-operational state for * We will not autonomously enter any non-operational state for
...@@ -1387,7 +1387,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) ...@@ -1387,7 +1387,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
* lowest-power state, not the number of states. * lowest-power state, not the number of states.
*/ */
for (state = (int)ctrl->npss; state >= 0; state--) { for (state = (int)ctrl->npss; state >= 0; state--) {
u64 total_latency_us, transition_ms; u64 total_latency_us, exit_latency_us, transition_ms;
if (target) if (target)
table->entries[state] = target; table->entries[state] = target;
...@@ -1408,12 +1408,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) ...@@ -1408,12 +1408,15 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl)
NVME_PS_FLAGS_NON_OP_STATE)) NVME_PS_FLAGS_NON_OP_STATE))
continue; continue;
total_latency_us = exit_latency_us =
(u64)le32_to_cpu(ctrl->psd[state].entry_lat) + (u64)le32_to_cpu(ctrl->psd[state].exit_lat);
+ le32_to_cpu(ctrl->psd[state].exit_lat); if (exit_latency_us > ctrl->ps_max_latency_us)
if (total_latency_us > ctrl->ps_max_latency_us)
continue; continue;
total_latency_us =
exit_latency_us +
le32_to_cpu(ctrl->psd[state].entry_lat);
/* /*
* This state is good. Use it as the APST idle * This state is good. Use it as the APST idle
* target for higher power states. * target for higher power states.
...@@ -2438,6 +2441,10 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl) ...@@ -2438,6 +2441,10 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
struct nvme_ns *ns; struct nvme_ns *ns;
mutex_lock(&ctrl->namespaces_mutex); mutex_lock(&ctrl->namespaces_mutex);
/* Forcibly start all queues to avoid having stuck requests */
blk_mq_start_hw_queues(ctrl->admin_q);
list_for_each_entry(ns, &ctrl->namespaces, list) { list_for_each_entry(ns, &ctrl->namespaces, list) {
/* /*
* Revalidating a dead namespace sets capacity to 0. This will * Revalidating a dead namespace sets capacity to 0. This will
......
...@@ -1139,6 +1139,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) ...@@ -1139,6 +1139,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
/* *********************** NVME Ctrl Routines **************************** */ /* *********************** NVME Ctrl Routines **************************** */
static void __nvme_fc_final_op_cleanup(struct request *rq); static void __nvme_fc_final_op_cleanup(struct request *rq);
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
static int static int
nvme_fc_reinit_request(void *data, struct request *rq) nvme_fc_reinit_request(void *data, struct request *rq)
...@@ -1265,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1265,7 +1266,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
struct nvme_command *sqe = &op->cmd_iu.sqe; struct nvme_command *sqe = &op->cmd_iu.sqe;
__le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1);
union nvme_result result; union nvme_result result;
bool complete_rq; bool complete_rq, terminate_assoc = true;
/* /*
* WARNING: * WARNING:
...@@ -1294,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1294,6 +1295,14 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
* fabricate a CQE, the following fields will not be set as they * fabricate a CQE, the following fields will not be set as they
* are not referenced: * are not referenced:
* cqe.sqid, cqe.sqhd, cqe.command_id * cqe.sqid, cqe.sqhd, cqe.command_id
*
* Failure or error of an individual i/o, in a transport
* detected fashion unrelated to the nvme completion status,
* potentially cause the initiator and target sides to get out
* of sync on SQ head/tail (aka outstanding io count allowed).
* Per FC-NVME spec, failure of an individual command requires
* the connection to be terminated, which in turn requires the
* association to be terminated.
*/ */
fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma,
...@@ -1359,6 +1368,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1359,6 +1368,8 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
goto done; goto done;
} }
terminate_assoc = false;
done: done:
if (op->flags & FCOP_FLAGS_AEN) { if (op->flags & FCOP_FLAGS_AEN) {
nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); nvme_complete_async_event(&queue->ctrl->ctrl, status, &result);
...@@ -1366,7 +1377,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1366,7 +1377,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
atomic_set(&op->state, FCPOP_STATE_IDLE); atomic_set(&op->state, FCPOP_STATE_IDLE);
op->flags = FCOP_FLAGS_AEN; /* clear other flags */ op->flags = FCOP_FLAGS_AEN; /* clear other flags */
nvme_fc_ctrl_put(ctrl); nvme_fc_ctrl_put(ctrl);
return; goto check_error;
} }
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
...@@ -1379,6 +1390,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ...@@ -1379,6 +1390,10 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
nvme_end_request(rq, status, result); nvme_end_request(rq, status, result);
} else } else
__nvme_fc_final_op_cleanup(rq); __nvme_fc_final_op_cleanup(rq);
check_error:
if (terminate_assoc)
nvme_fc_error_recovery(ctrl, "transport detected io error");
} }
static int static int
...@@ -2791,6 +2806,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -2791,6 +2806,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.opts = NULL; ctrl->ctrl.opts = NULL;
/* initiate nvme ctrl ref counting teardown */ /* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
/* as we're past the point where we transition to the ref /* as we're past the point where we transition to the ref
* counting teardown path, if we return a bad pointer here, * counting teardown path, if we return a bad pointer here,
......
...@@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) ...@@ -1367,7 +1367,7 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
/* If there is a reset ongoing, we shouldn't reset again. */ /* If there is a reset ongoing, we shouldn't reset again. */
if (work_busy(&dev->reset_work)) if (dev->ctrl.state == NVME_CTRL_RESETTING)
return false; return false;
/* We shouldn't reset unless the controller is on fatal error state /* We shouldn't reset unless the controller is on fatal error state
...@@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -1903,7 +1903,7 @@ static void nvme_reset_work(struct work_struct *work)
bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
int result = -ENODEV; int result = -ENODEV;
if (WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING)) if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING))
goto out; goto out;
/* /*
...@@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -1913,9 +1913,6 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false); nvme_dev_disable(dev, false);
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
goto out;
result = nvme_pci_enable(dev); result = nvme_pci_enable(dev);
if (result) if (result)
goto out; goto out;
...@@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev) ...@@ -2009,8 +2006,8 @@ static int nvme_reset(struct nvme_dev *dev)
{ {
if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
return -ENODEV; return -ENODEV;
if (work_busy(&dev->reset_work)) if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
return -ENODEV; return -EBUSY;
if (!queue_work(nvme_workq, &dev->reset_work)) if (!queue_work(nvme_workq, &dev->reset_work))
return -EBUSY; return -EBUSY;
return 0; return 0;
...@@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -2136,6 +2133,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (result) if (result)
goto release_pools; goto release_pools;
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING);
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
queue_work(nvme_workq, &dev->reset_work); queue_work(nvme_workq, &dev->reset_work);
...@@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -2179,6 +2177,7 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
cancel_work_sync(&dev->reset_work);
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
if (!pci_device_is_present(pdev)) { if (!pci_device_is_present(pdev)) {
......
...@@ -753,28 +753,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ...@@ -753,28 +753,26 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
if (ret) if (ret)
goto requeue; goto requeue;
blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
ret = nvmf_connect_admin_queue(&ctrl->ctrl); ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (ret) if (ret)
goto stop_admin_q; goto requeue;
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags);
ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
if (ret) if (ret)
goto stop_admin_q; goto requeue;
nvme_start_keep_alive(&ctrl->ctrl); nvme_start_keep_alive(&ctrl->ctrl);
if (ctrl->queue_count > 1) { if (ctrl->queue_count > 1) {
ret = nvme_rdma_init_io_queues(ctrl); ret = nvme_rdma_init_io_queues(ctrl);
if (ret) if (ret)
goto stop_admin_q; goto requeue;
ret = nvme_rdma_connect_io_queues(ctrl); ret = nvme_rdma_connect_io_queues(ctrl);
if (ret) if (ret)
goto stop_admin_q; goto requeue;
} }
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
...@@ -782,7 +780,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ...@@ -782,7 +780,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
ctrl->ctrl.opts->nr_reconnects = 0; ctrl->ctrl.opts->nr_reconnects = 0;
if (ctrl->queue_count > 1) { if (ctrl->queue_count > 1) {
nvme_start_queues(&ctrl->ctrl);
nvme_queue_scan(&ctrl->ctrl); nvme_queue_scan(&ctrl->ctrl);
nvme_queue_async_events(&ctrl->ctrl); nvme_queue_async_events(&ctrl->ctrl);
} }
...@@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ...@@ -791,8 +788,6 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
return; return;
stop_admin_q:
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
requeue: requeue:
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.opts->nr_reconnects); ctrl->ctrl.opts->nr_reconnects);
...@@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) ...@@ -823,6 +818,13 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_cancel_request, &ctrl->ctrl); nvme_cancel_request, &ctrl->ctrl);
/*
* queues are not a live anymore, so restart the queues to fail fast
* new IO
*/
blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true);
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_reconnect_or_remove(ctrl); nvme_rdma_reconnect_or_remove(ctrl);
} }
...@@ -1433,7 +1435,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved) ...@@ -1433,7 +1435,7 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
/* /*
* We cannot accept any other command until the Connect command has completed. * We cannot accept any other command until the Connect command has completed.
*/ */
static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, static inline int nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
struct request *rq) struct request *rq)
{ {
if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) {
...@@ -1441,11 +1443,22 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, ...@@ -1441,11 +1443,22 @@ static inline bool nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue,
if (!blk_rq_is_passthrough(rq) || if (!blk_rq_is_passthrough(rq) ||
cmd->common.opcode != nvme_fabrics_command || cmd->common.opcode != nvme_fabrics_command ||
cmd->fabrics.fctype != nvme_fabrics_type_connect) cmd->fabrics.fctype != nvme_fabrics_type_connect) {
return false; /*
* reconnecting state means transport disruption, which
* can take a long time and even might fail permanently,
* so we can't let incoming I/O be requeued forever.
* fail it fast to allow upper layers a chance to
* failover.
*/
if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING)
return -EIO;
else
return -EAGAIN;
}
} }
return true; return 0;
} }
static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
...@@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1463,8 +1476,9 @@ static int nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
WARN_ON_ONCE(rq->tag < 0); WARN_ON_ONCE(rq->tag < 0);
if (!nvme_rdma_queue_is_ready(queue, rq)) ret = nvme_rdma_queue_is_ready(queue, rq);
return BLK_MQ_RQ_QUEUE_BUSY; if (unlikely(ret))
goto err;
dev = queue->device->dev; dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma, ib_dma_sync_single_for_cpu(dev, sqe->dma,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment