Commit 9c38475c authored by Jens Axboe's avatar Jens Axboe

Merge tag 'nvme-5.13-2021-05-05' of git://git.infradead.org/nvme into block-5.13

Pull NVMe fixes from Christoph:

"nvme updates for Linux 5.13

 - reset the bdev to ns head when failover (Daniel Wagner)
 - remove unsupported command noise (Keith Busch)
 - misc passthrough improvements (Kanchan Joshi)
 - fix controller ioctl through ns_head (Minwoo Im)
 - fix controller timeouts during reset (Tao Chiu)"

* tag 'nvme-5.13-2021-05-05' of git://git.infradead.org/nvme:
  nvmet: remove unsupported command noise
  nvme-multipath: reset bdev to ns head when failover
  nvme-pci: fix controller reset hang when racing with nvme_timeout
  nvme: move the fabrics queue ready check routines to core
  nvme: avoid memset for passthrough requests
  nvme: add nvme_get_ns helper
  nvme: fix controller ioctl through ns_head
parents cd2c7545 4a203425
......@@ -576,6 +576,11 @@ static void nvme_free_ns(struct kref *kref)
kfree(ns);
}
static inline bool nvme_get_ns(struct nvme_ns *ns)
{
return kref_get_unless_zero(&ns->kref);
}
void nvme_put_ns(struct nvme_ns *ns)
{
kref_put(&ns->kref, nvme_free_ns);
......@@ -584,9 +589,6 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU);
static inline void nvme_clear_nvme_request(struct request *req)
{
struct nvme_command *cmd = nvme_req(req)->cmd;
memset(cmd, 0, sizeof(*cmd));
nvme_req(req)->retries = 0;
nvme_req(req)->flags = 0;
req->rq_flags |= RQF_DONTPREP;
......@@ -637,6 +639,66 @@ static struct request *nvme_alloc_request_qid(struct request_queue *q,
return req;
}
/*
* For something we're not in a state to send to the device the default action
* is to busy it and retry it after the controller state is recovered. However,
* if the controller is deleting or if anything is marked for failfast or
* nvme multipath it is immediately failed.
*
* Note: commands used to initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
ctrl->state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
return nvme_host_path_error(rq);
}
EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
struct nvme_request *req = nvme_req(rq);
/*
* currently we have a problem sending passthru commands
* on the admin_q if the controller is not LIVE because we can't
* make sure that they are going out after the admin connect,
* controller enable and/or other commands in the initialization
* sequence. until the controller will be LIVE, fail with
* BLK_STS_RESOURCE so that they will be rescheduled.
*/
if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD))
return false;
if (ctrl->ops->flags & NVME_F_FABRICS) {
/*
* Only allow commands on a live queue, except for the connect
* command, which is require to set the queue live in the
* appropinquate states.
*/
switch (ctrl->state) {
case NVME_CTRL_CONNECTING:
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
return true;
break;
default:
break;
case NVME_CTRL_DEAD:
return false;
}
}
return queue_live;
}
EXPORT_SYMBOL_GPL(__nvme_check_ready);
static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
{
struct nvme_command c;
......@@ -898,8 +960,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
struct nvme_command *cmd = nvme_req(req)->cmd;
blk_status_t ret = BLK_STS_OK;
if (!(req->rq_flags & RQF_DONTPREP))
if (!(req->rq_flags & RQF_DONTPREP)) {
nvme_clear_nvme_request(req);
memset(cmd, 0, sizeof(*cmd));
}
switch (req_op(req)) {
case REQ_OP_DRV_IN:
......@@ -1494,7 +1558,7 @@ static int nvme_ns_open(struct nvme_ns *ns)
/* should never be called due to GENHD_FL_HIDDEN */
if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head)))
goto fail;
if (!kref_get_unless_zero(&ns->kref))
if (!nvme_get_ns(ns))
goto fail;
if (!try_module_get(ns->ctrl->ops->module))
goto fail_put_ns;
......@@ -1999,28 +2063,6 @@ static const struct block_device_operations nvme_bdev_ops = {
.pr_ops = &nvme_pr_ops,
};
#ifdef CONFIG_NVME_MULTIPATH
struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys)
{
struct nvme_ctrl *ctrl;
int ret;
ret = mutex_lock_killable(&nvme_subsystems_lock);
if (ret)
return ERR_PTR(ret);
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
if (ctrl->state == NVME_CTRL_LIVE)
goto found;
}
mutex_unlock(&nvme_subsystems_lock);
return ERR_PTR(-EWOULDBLOCK);
found:
nvme_get_ctrl(ctrl);
mutex_unlock(&nvme_subsystems_lock);
return ctrl;
}
#endif /* CONFIG_NVME_MULTIPATH */
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
{
unsigned long timeout =
......@@ -3604,7 +3646,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->head->ns_id == nsid) {
if (!kref_get_unless_zero(&ns->kref))
if (!nvme_get_ns(ns))
continue;
ret = ns;
break;
......
......@@ -533,63 +533,6 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
return NULL;
}
/*
* For something we're not in a state to send to the device the default action
* is to busy it and retry it after the controller state is recovered. However,
* if the controller is deleting or if anything is marked for failfast or
* nvme multipath it is immediately failed.
*
* Note: commands used to initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq)
{
if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
ctrl->state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
return nvme_host_path_error(rq);
}
EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
struct nvme_request *req = nvme_req(rq);
/*
* currently we have a problem sending passthru commands
* on the admin_q if the controller is not LIVE because we can't
* make sure that they are going out after the admin connect,
* controller enable and/or other commands in the initialization
* sequence. until the controller will be LIVE, fail with
* BLK_STS_RESOURCE so that they will be rescheduled.
*/
if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD))
return false;
/*
* Only allow commands on a live queue, except for the connect command,
* which is require to set the queue live in the appropinquate states.
*/
switch (ctrl->state) {
case NVME_CTRL_CONNECTING:
if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
return true;
break;
default:
break;
case NVME_CTRL_DEAD:
return false;
}
return queue_live;
}
EXPORT_SYMBOL_GPL(__nvmf_check_ready);
static const match_table_t opt_tokens = {
{ NVMF_OPT_TRANSPORT, "transport=%s" },
{ NVMF_OPT_TRADDR, "traddr=%s" },
......
......@@ -184,20 +184,7 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *rq);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live);
bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts);
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
if (likely(ctrl->state == NVME_CTRL_LIVE ||
ctrl->state == NVME_CTRL_DELETING))
return true;
return __nvmf_check_ready(ctrl, rq, queue_live);
}
#endif /* _NVME_FABRICS_H */
......@@ -2766,8 +2766,8 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_status_t ret;
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
ret = nvme_setup_cmd(ns, rq);
if (ret)
......
......@@ -370,41 +370,45 @@ long nvme_ns_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
#ifdef CONFIG_NVME_MULTIPATH
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head,
unsigned int cmd, void __user *argp)
static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp, struct nvme_ns_head *head, int srcu_idx)
{
struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys);
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
if (IS_ERR(ctrl))
return PTR_ERR(ctrl);
ret = nvme_ctrl_ioctl(ctrl, cmd, argp);
nvme_put_ctrl(ctrl);
return ret;
}
nvme_get_ctrl(ns->ctrl);
nvme_put_ns_from_disk(head, srcu_idx);
ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head,
unsigned int cmd, void __user *argp)
{
int srcu_idx = srcu_read_lock(&head->srcu);
struct nvme_ns *ns = nvme_find_path(head);
int ret = -EWOULDBLOCK;
if (ns)
ret = nvme_ns_ioctl(ns, cmd, argp);
srcu_read_unlock(&head->srcu, srcu_idx);
nvme_put_ctrl(ctrl);
return ret;
}
int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct nvme_ns_head *head = bdev->bd_disk->private_data;
struct nvme_ns_head *head = NULL;
void __user *argp = (void __user *)arg;
struct nvme_ns *ns;
int srcu_idx, ret;
ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
if (unlikely(!ns))
return -EWOULDBLOCK;
/*
* Handle ioctls that apply to the controller instead of the namespace
* seperately and drop the ns SRCU reference early. This avoids a
* deadlock when deleting namespaces using the passthrough interface.
*/
if (is_ctrl_ioctl(cmd))
return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
return nvme_ns_head_ns_ioctl(head, cmd, argp);
ret = nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
else {
ret = nvme_ns_ioctl(ns, cmd, argp);
nvme_put_ns_from_disk(head, srcu_idx);
}
return ret;
}
long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
......@@ -414,10 +418,23 @@ long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
struct nvme_ns_head *head =
container_of(cdev, struct nvme_ns_head, cdev);
void __user *argp = (void __user *)arg;
struct nvme_ns *ns;
int srcu_idx, ret;
srcu_idx = srcu_read_lock(&head->srcu);
ns = nvme_find_path(head);
if (!ns) {
srcu_read_unlock(&head->srcu, srcu_idx);
return -EWOULDBLOCK;
}
if (is_ctrl_ioctl(cmd))
return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
return nvme_ns_head_ns_ioctl(head, cmd, argp);
return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
ret = nvme_ns_ioctl(ns, cmd, argp);
nvme_put_ns_from_disk(head, srcu_idx);
return ret;
}
#endif /* CONFIG_NVME_MULTIPATH */
......
......@@ -70,6 +70,7 @@ void nvme_failover_req(struct request *req)
struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status & 0x7ff;
unsigned long flags;
struct bio *bio;
nvme_mpath_clear_current_path(ns);
......@@ -84,6 +85,8 @@ void nvme_failover_req(struct request *req)
}
spin_lock_irqsave(&ns->head->requeue_lock, flags);
for (bio = req->bio; bio; bio = bio->bi_next)
bio_set_dev(bio, ns->head->disk->part0);
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
......
......@@ -638,6 +638,21 @@ struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, blk_mq_req_flags_t flags);
void nvme_cleanup_cmd(struct request *req);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
struct request *req);
bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live);
static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
if (likely(ctrl->state == NVME_CTRL_LIVE))
return true;
if (ctrl->ops->flags & NVME_F_FABRICS &&
ctrl->state == NVME_CTRL_DELETING)
return true;
return __nvme_check_ready(ctrl, rq, queue_live);
}
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
......@@ -664,7 +679,6 @@ struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk,
void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
bool nvme_tryget_ns_head(struct nvme_ns_head *head);
void nvme_put_ns_head(struct nvme_ns_head *head);
struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys);
int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
const struct file_operations *fops, struct module *owner);
void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
......
......@@ -933,6 +933,9 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
return BLK_STS_IOERR;
if (!nvme_check_ready(&dev->ctrl, req, true))
return nvme_fail_nonready_command(&dev->ctrl, req);
ret = nvme_setup_cmd(ns, req);
if (ret)
return ret;
......
......@@ -2050,8 +2050,8 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
WARN_ON_ONCE(rq->tag < 0);
if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
dev = queue->device->dev;
......
......@@ -2338,8 +2338,8 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
blk_status_t ret;
if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
ret = nvme_tcp_setup_cmd_pdu(ns, rq);
if (unlikely(ret))
......
......@@ -307,7 +307,7 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
case NVME_LOG_ANA:
return nvmet_execute_get_log_page_ana(req);
}
pr_err("unhandled lid %d on qid %d\n",
pr_debug("unhandled lid %d on qid %d\n",
req->cmd->get_log_page.lid, req->sq->qid);
req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
......@@ -659,7 +659,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
return nvmet_execute_identify_desclist(req);
}
pr_err("unhandled identify cns %d on qid %d\n",
pr_debug("unhandled identify cns %d on qid %d\n",
req->cmd->identify.cns, req->sq->qid);
req->error_loc = offsetof(struct nvme_identify, cns);
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
......@@ -977,7 +977,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
return 0;
}
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
......
......@@ -138,8 +138,8 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
blk_status_t ret;
if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req);
if (!nvme_check_ready(&queue->ctrl->ctrl, req, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, req);
ret = nvme_setup_cmd(ns, req);
if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment