Commit 1521dc24 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'nvme-6.10-2024-05-29' of git://git.infradead.org/nvme into block-6.10

Pull NVMe fixes from Keith:

"nvme fixes for Linux 6.10

 - Removing unused fields (Kanchan)
 - Large folio offsets support (Kundan)
 - Multipath NUMA node initialiazation fix (Nilay)
 - Multipath IO stats accounting fixes (Keith)
 - Circular lockdep fix (Keith)
 - Target race condition fix (Sagi)
 - Target memory leak fix (Sagi)"

* tag 'nvme-6.10-2024-05-29' of git://git.infradead.org/nvme:
  nvmet: fix a possible leak when destroy a ctrl during qp establishment
  nvme: use srcu for iterating namespace list
  nvme: adjust multiples of NVME_CTRL_PAGE_SIZE in offset
  nvme: remove sgs and sws
  nvmet: fix ns enable/disable possible hang
  nvme-multipath: fix io accounting on failover
  nvme: fix multipath batched completion accounting
  nvme-multipath: find NUMA path only for online numa-node
parents 74d4ce92 c758b77d
...@@ -414,7 +414,15 @@ static inline void nvme_end_req_zoned(struct request *req) ...@@ -414,7 +414,15 @@ static inline void nvme_end_req_zoned(struct request *req)
} }
} }
static inline void nvme_end_req(struct request *req) static inline void __nvme_end_req(struct request *req)
{
nvme_end_req_zoned(req);
nvme_trace_bio_complete(req);
if (req->cmd_flags & REQ_NVME_MPATH)
nvme_mpath_end_request(req);
}
void nvme_end_req(struct request *req)
{ {
blk_status_t status = nvme_error_status(nvme_req(req)->status); blk_status_t status = nvme_error_status(nvme_req(req)->status);
...@@ -424,10 +432,7 @@ static inline void nvme_end_req(struct request *req) ...@@ -424,10 +432,7 @@ static inline void nvme_end_req(struct request *req)
else else
nvme_log_error(req); nvme_log_error(req);
} }
nvme_end_req_zoned(req); __nvme_end_req(req);
nvme_trace_bio_complete(req);
if (req->cmd_flags & REQ_NVME_MPATH)
nvme_mpath_end_request(req);
blk_mq_end_request(req, status); blk_mq_end_request(req, status);
} }
...@@ -476,7 +481,7 @@ void nvme_complete_batch_req(struct request *req) ...@@ -476,7 +481,7 @@ void nvme_complete_batch_req(struct request *req)
{ {
trace_nvme_complete_rq(req); trace_nvme_complete_rq(req);
nvme_cleanup_cmd(req); nvme_cleanup_cmd(req);
nvme_end_req_zoned(req); __nvme_end_req(req);
} }
EXPORT_SYMBOL_GPL(nvme_complete_batch_req); EXPORT_SYMBOL_GPL(nvme_complete_batch_req);
...@@ -673,7 +678,7 @@ static void nvme_free_ns(struct kref *kref) ...@@ -673,7 +678,7 @@ static void nvme_free_ns(struct kref *kref)
kfree(ns); kfree(ns);
} }
static inline bool nvme_get_ns(struct nvme_ns *ns) bool nvme_get_ns(struct nvme_ns *ns)
{ {
return kref_get_unless_zero(&ns->kref); return kref_get_unless_zero(&ns->kref);
} }
...@@ -3679,9 +3684,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) ...@@ -3679,9 +3684,10 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{ {
struct nvme_ns *ns, *ret = NULL; struct nvme_ns *ns, *ret = NULL;
int srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) { list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
if (ns->head->ns_id == nsid) { if (ns->head->ns_id == nsid) {
if (!nvme_get_ns(ns)) if (!nvme_get_ns(ns))
continue; continue;
...@@ -3691,7 +3697,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -3691,7 +3697,7 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
if (ns->head->ns_id > nsid) if (ns->head->ns_id > nsid)
break; break;
} }
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
return ret; return ret;
} }
EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
...@@ -3705,7 +3711,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns) ...@@ -3705,7 +3711,7 @@ static void nvme_ns_add_to_ctrl_list(struct nvme_ns *ns)
list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) { list_for_each_entry_reverse(tmp, &ns->ctrl->namespaces, list) {
if (tmp->head->ns_id < ns->head->ns_id) { if (tmp->head->ns_id < ns->head->ns_id) {
list_add(&ns->list, &tmp->list); list_add_rcu(&ns->list, &tmp->list);
return; return;
} }
} }
...@@ -3771,17 +3777,18 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) ...@@ -3771,17 +3777,18 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
if (nvme_update_ns_info(ns, info)) if (nvme_update_ns_info(ns, info))
goto out_unlink_ns; goto out_unlink_ns;
down_write(&ctrl->namespaces_rwsem); mutex_lock(&ctrl->namespaces_lock);
/* /*
* Ensure that no namespaces are added to the ctrl list after the queues * Ensure that no namespaces are added to the ctrl list after the queues
* are frozen, thereby avoiding a deadlock between scan and reset. * are frozen, thereby avoiding a deadlock between scan and reset.
*/ */
if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) { if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) {
up_write(&ctrl->namespaces_rwsem); mutex_unlock(&ctrl->namespaces_lock);
goto out_unlink_ns; goto out_unlink_ns;
} }
nvme_ns_add_to_ctrl_list(ns); nvme_ns_add_to_ctrl_list(ns);
up_write(&ctrl->namespaces_rwsem); mutex_unlock(&ctrl->namespaces_lock);
synchronize_srcu(&ctrl->srcu);
nvme_get_ctrl(ctrl); nvme_get_ctrl(ctrl);
if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups)) if (device_add_disk(ctrl->device, ns->disk, nvme_ns_attr_groups))
...@@ -3804,9 +3811,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info) ...@@ -3804,9 +3811,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
out_cleanup_ns_from_list: out_cleanup_ns_from_list:
nvme_put_ctrl(ctrl); nvme_put_ctrl(ctrl);
down_write(&ctrl->namespaces_rwsem); mutex_lock(&ctrl->namespaces_lock);
list_del_init(&ns->list); list_del_rcu(&ns->list);
up_write(&ctrl->namespaces_rwsem); mutex_unlock(&ctrl->namespaces_lock);
synchronize_srcu(&ctrl->srcu);
out_unlink_ns: out_unlink_ns:
mutex_lock(&ctrl->subsys->lock); mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings); list_del_rcu(&ns->siblings);
...@@ -3856,9 +3864,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) ...@@ -3856,9 +3864,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_cdev_del(&ns->cdev, &ns->cdev_device); nvme_cdev_del(&ns->cdev, &ns->cdev_device);
del_gendisk(ns->disk); del_gendisk(ns->disk);
down_write(&ns->ctrl->namespaces_rwsem); mutex_lock(&ns->ctrl->namespaces_lock);
list_del_init(&ns->list); list_del_rcu(&ns->list);
up_write(&ns->ctrl->namespaces_rwsem); mutex_unlock(&ns->ctrl->namespaces_lock);
synchronize_srcu(&ns->ctrl->srcu);
if (last_path) if (last_path)
nvme_mpath_shutdown_disk(ns->head); nvme_mpath_shutdown_disk(ns->head);
...@@ -3948,16 +3957,17 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, ...@@ -3948,16 +3957,17 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
struct nvme_ns *ns, *next; struct nvme_ns *ns, *next;
LIST_HEAD(rm_list); LIST_HEAD(rm_list);
down_write(&ctrl->namespaces_rwsem); mutex_lock(&ctrl->namespaces_lock);
list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) { list_for_each_entry_safe(ns, next, &ctrl->namespaces, list) {
if (ns->head->ns_id > nsid) if (ns->head->ns_id > nsid)
list_move_tail(&ns->list, &rm_list); list_splice_init_rcu(&ns->list, &rm_list,
synchronize_rcu);
} }
up_write(&ctrl->namespaces_rwsem); mutex_unlock(&ctrl->namespaces_lock);
synchronize_srcu(&ctrl->srcu);
list_for_each_entry_safe(ns, next, &rm_list, list) list_for_each_entry_safe(ns, next, &rm_list, list)
nvme_ns_remove(ns); nvme_ns_remove(ns);
} }
static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
...@@ -4127,9 +4137,10 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) ...@@ -4127,9 +4137,10 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
/* this is a no-op when called from the controller reset handler */ /* this is a no-op when called from the controller reset handler */
nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO); nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
down_write(&ctrl->namespaces_rwsem); mutex_lock(&ctrl->namespaces_lock);
list_splice_init(&ctrl->namespaces, &ns_list); list_splice_init_rcu(&ctrl->namespaces, &ns_list, synchronize_rcu);
up_write(&ctrl->namespaces_rwsem); mutex_unlock(&ctrl->namespaces_lock);
synchronize_srcu(&ctrl->srcu);
list_for_each_entry_safe(ns, next, &ns_list, list) list_for_each_entry_safe(ns, next, &ns_list, list)
nvme_ns_remove(ns); nvme_ns_remove(ns);
...@@ -4577,6 +4588,7 @@ static void nvme_free_ctrl(struct device *dev) ...@@ -4577,6 +4588,7 @@ static void nvme_free_ctrl(struct device *dev)
key_put(ctrl->tls_key); key_put(ctrl->tls_key);
nvme_free_cels(ctrl); nvme_free_cels(ctrl);
nvme_mpath_uninit(ctrl); nvme_mpath_uninit(ctrl);
cleanup_srcu_struct(&ctrl->srcu);
nvme_auth_stop(ctrl); nvme_auth_stop(ctrl);
nvme_auth_free(ctrl); nvme_auth_free(ctrl);
__free_page(ctrl->discard_page); __free_page(ctrl->discard_page);
...@@ -4609,10 +4621,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -4609,10 +4621,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->passthru_err_log_enabled = false; ctrl->passthru_err_log_enabled = false;
clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags); clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
spin_lock_init(&ctrl->lock); spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->namespaces_lock);
ret = init_srcu_struct(&ctrl->srcu);
if (ret)
return ret;
mutex_init(&ctrl->scan_lock); mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces); INIT_LIST_HEAD(&ctrl->namespaces);
xa_init(&ctrl->cels); xa_init(&ctrl->cels);
init_rwsem(&ctrl->namespaces_rwsem);
ctrl->dev = dev; ctrl->dev = dev;
ctrl->ops = ops; ctrl->ops = ops;
ctrl->quirks = quirks; ctrl->quirks = quirks;
...@@ -4692,6 +4709,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ...@@ -4692,6 +4709,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
out: out:
if (ctrl->discard_page) if (ctrl->discard_page)
__free_page(ctrl->discard_page); __free_page(ctrl->discard_page);
cleanup_srcu_struct(&ctrl->srcu);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(nvme_init_ctrl); EXPORT_SYMBOL_GPL(nvme_init_ctrl);
...@@ -4700,22 +4718,24 @@ EXPORT_SYMBOL_GPL(nvme_init_ctrl); ...@@ -4700,22 +4718,24 @@ EXPORT_SYMBOL_GPL(nvme_init_ctrl);
void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl) void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
blk_mark_disk_dead(ns->disk); blk_mark_disk_dead(ns->disk);
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
} }
EXPORT_SYMBOL_GPL(nvme_mark_namespaces_dead); EXPORT_SYMBOL_GPL(nvme_mark_namespaces_dead);
void nvme_unfreeze(struct nvme_ctrl *ctrl) void nvme_unfreeze(struct nvme_ctrl *ctrl)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
blk_mq_unfreeze_queue(ns->queue); blk_mq_unfreeze_queue(ns->queue);
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
clear_bit(NVME_CTRL_FROZEN, &ctrl->flags); clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
} }
EXPORT_SYMBOL_GPL(nvme_unfreeze); EXPORT_SYMBOL_GPL(nvme_unfreeze);
...@@ -4723,14 +4743,15 @@ EXPORT_SYMBOL_GPL(nvme_unfreeze); ...@@ -4723,14 +4743,15 @@ EXPORT_SYMBOL_GPL(nvme_unfreeze);
int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) { list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);
if (timeout <= 0) if (timeout <= 0)
break; break;
} }
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
return timeout; return timeout;
} }
EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
...@@ -4738,23 +4759,25 @@ EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout); ...@@ -4738,23 +4759,25 @@ EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);
void nvme_wait_freeze(struct nvme_ctrl *ctrl) void nvme_wait_freeze(struct nvme_ctrl *ctrl)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
blk_mq_freeze_queue_wait(ns->queue); blk_mq_freeze_queue_wait(ns->queue);
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
} }
EXPORT_SYMBOL_GPL(nvme_wait_freeze); EXPORT_SYMBOL_GPL(nvme_wait_freeze);
void nvme_start_freeze(struct nvme_ctrl *ctrl) void nvme_start_freeze(struct nvme_ctrl *ctrl)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
set_bit(NVME_CTRL_FROZEN, &ctrl->flags); set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
blk_freeze_queue_start(ns->queue); blk_freeze_queue_start(ns->queue);
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
} }
EXPORT_SYMBOL_GPL(nvme_start_freeze); EXPORT_SYMBOL_GPL(nvme_start_freeze);
...@@ -4797,11 +4820,12 @@ EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue); ...@@ -4797,11 +4820,12 @@ EXPORT_SYMBOL_GPL(nvme_unquiesce_admin_queue);
void nvme_sync_io_queues(struct nvme_ctrl *ctrl) void nvme_sync_io_queues(struct nvme_ctrl *ctrl)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) list_for_each_entry_rcu(ns, &ctrl->namespaces, list)
blk_sync_queue(ns->queue); blk_sync_queue(ns->queue);
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
} }
EXPORT_SYMBOL_GPL(nvme_sync_io_queues); EXPORT_SYMBOL_GPL(nvme_sync_io_queues);
......
...@@ -789,15 +789,15 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, ...@@ -789,15 +789,15 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
bool open_for_write) bool open_for_write)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int ret; int ret, srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
if (list_empty(&ctrl->namespaces)) { if (list_empty(&ctrl->namespaces)) {
ret = -ENOTTY; ret = -ENOTTY;
goto out_unlock; goto out_unlock;
} }
ns = list_first_entry(&ctrl->namespaces, struct nvme_ns, list); ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list);
if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) { if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
dev_warn(ctrl->device, dev_warn(ctrl->device,
"NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n"); "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
...@@ -807,15 +807,18 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp, ...@@ -807,15 +807,18 @@ static int nvme_dev_user_cmd(struct nvme_ctrl *ctrl, void __user *argp,
dev_warn(ctrl->device, dev_warn(ctrl->device,
"using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n");
kref_get(&ns->kref); if (!nvme_get_ns(ns)) {
up_read(&ctrl->namespaces_rwsem); ret = -ENXIO;
goto out_unlock;
}
srcu_read_unlock(&ctrl->srcu, srcu_idx);
ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write); ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write);
nvme_put_ns(ns); nvme_put_ns(ns);
return ret; return ret;
out_unlock: out_unlock:
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
return ret; return ret;
} }
......
...@@ -118,7 +118,8 @@ void nvme_failover_req(struct request *req) ...@@ -118,7 +118,8 @@ void nvme_failover_req(struct request *req)
blk_steal_bios(&ns->head->requeue_list, req); blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags); spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
blk_mq_end_request(req, 0); nvme_req(req)->status = 0;
nvme_end_req(req);
kblockd_schedule_work(&ns->head->requeue_work); kblockd_schedule_work(&ns->head->requeue_work);
} }
...@@ -150,16 +151,17 @@ void nvme_mpath_end_request(struct request *rq) ...@@ -150,16 +151,17 @@ void nvme_mpath_end_request(struct request *rq)
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) { list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
if (!ns->head->disk) if (!ns->head->disk)
continue; continue;
kblockd_schedule_work(&ns->head->requeue_work); kblockd_schedule_work(&ns->head->requeue_work);
if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) if (nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE)
disk_uevent(ns->head->disk, KOBJ_CHANGE); disk_uevent(ns->head->disk, KOBJ_CHANGE);
} }
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
} }
static const char *nvme_ana_state_names[] = { static const char *nvme_ana_state_names[] = {
...@@ -193,13 +195,14 @@ bool nvme_mpath_clear_current_path(struct nvme_ns *ns) ...@@ -193,13 +195,14 @@ bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) { list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
nvme_mpath_clear_current_path(ns); nvme_mpath_clear_current_path(ns);
kblockd_schedule_work(&ns->head->requeue_work); kblockd_schedule_work(&ns->head->requeue_work);
} }
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
} }
void nvme_mpath_revalidate_paths(struct nvme_ns *ns) void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
...@@ -595,7 +598,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) ...@@ -595,7 +598,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
int node, srcu_idx; int node, srcu_idx;
srcu_idx = srcu_read_lock(&head->srcu); srcu_idx = srcu_read_lock(&head->srcu);
for_each_node(node) for_each_online_node(node)
__nvme_find_path(head, node); __nvme_find_path(head, node);
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
} }
...@@ -680,6 +683,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, ...@@ -680,6 +683,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0; u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
unsigned *nr_change_groups = data; unsigned *nr_change_groups = data;
struct nvme_ns *ns; struct nvme_ns *ns;
int srcu_idx;
dev_dbg(ctrl->device, "ANA group %d: %s.\n", dev_dbg(ctrl->device, "ANA group %d: %s.\n",
le32_to_cpu(desc->grpid), le32_to_cpu(desc->grpid),
...@@ -691,8 +695,8 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, ...@@ -691,8 +695,8 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
if (!nr_nsids) if (!nr_nsids)
return 0; return 0;
down_read(&ctrl->namespaces_rwsem); srcu_idx = srcu_read_lock(&ctrl->srcu);
list_for_each_entry(ns, &ctrl->namespaces, list) { list_for_each_entry_rcu(ns, &ctrl->namespaces, list) {
unsigned nsid; unsigned nsid;
again: again:
nsid = le32_to_cpu(desc->nsids[n]); nsid = le32_to_cpu(desc->nsids[n]);
...@@ -705,7 +709,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, ...@@ -705,7 +709,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
if (ns->head->ns_id > nsid) if (ns->head->ns_id > nsid)
goto again; goto again;
} }
up_read(&ctrl->namespaces_rwsem); srcu_read_unlock(&ctrl->srcu, srcu_idx);
return 0; return 0;
} }
......
...@@ -282,7 +282,8 @@ struct nvme_ctrl { ...@@ -282,7 +282,8 @@ struct nvme_ctrl {
struct blk_mq_tag_set *tagset; struct blk_mq_tag_set *tagset;
struct blk_mq_tag_set *admin_tagset; struct blk_mq_tag_set *admin_tagset;
struct list_head namespaces; struct list_head namespaces;
struct rw_semaphore namespaces_rwsem; struct mutex namespaces_lock;
struct srcu_struct srcu;
struct device ctrl_device; struct device ctrl_device;
struct device *device; /* char device */ struct device *device; /* char device */
#ifdef CONFIG_NVME_HWMON #ifdef CONFIG_NVME_HWMON
...@@ -471,8 +472,6 @@ struct nvme_ns_head { ...@@ -471,8 +472,6 @@ struct nvme_ns_head {
u8 pi_type; u8 pi_type;
u8 pi_offset; u8 pi_offset;
u8 guard_type; u8 guard_type;
u16 sgs;
u32 sws;
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
u64 zsze; u64 zsze;
#endif #endif
...@@ -767,6 +766,7 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl) ...@@ -767,6 +766,7 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
} }
} }
void nvme_end_req(struct request *req);
void nvme_complete_rq(struct request *req); void nvme_complete_rq(struct request *req);
void nvme_complete_batch_req(struct request *req); void nvme_complete_batch_req(struct request *req);
...@@ -1161,6 +1161,7 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, ...@@ -1161,6 +1161,7 @@ void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects,
struct nvme_command *cmd, int status); struct nvme_command *cmd, int status);
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
bool nvme_get_ns(struct nvme_ns *ns);
void nvme_put_ns(struct nvme_ns *ns); void nvme_put_ns(struct nvme_ns *ns);
static inline bool nvme_multi_css(struct nvme_ctrl *ctrl) static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
......
...@@ -778,7 +778,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, ...@@ -778,7 +778,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct bio_vec bv = req_bvec(req); struct bio_vec bv = req_bvec(req);
if (!is_pci_p2pdma_page(bv.bv_page)) { if (!is_pci_p2pdma_page(bv.bv_page)) {
if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2) if ((bv.bv_offset & (NVME_CTRL_PAGE_SIZE - 1)) +
bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
return nvme_setup_prp_simple(dev, req, return nvme_setup_prp_simple(dev, req,
&cmnd->rw, &bv); &cmnd->rw, &bv);
......
...@@ -676,10 +676,18 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item, ...@@ -676,10 +676,18 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item,
if (kstrtobool(page, &enable)) if (kstrtobool(page, &enable))
return -EINVAL; return -EINVAL;
/*
* take a global nvmet_config_sem because the disable routine has a
* window where it releases the subsys-lock, giving a chance to
* a parallel enable to concurrently execute causing the disable to
* have a misaccounting of the ns percpu_ref.
*/
down_write(&nvmet_config_sem);
if (enable) if (enable)
ret = nvmet_ns_enable(ns); ret = nvmet_ns_enable(ns);
else else
nvmet_ns_disable(ns); nvmet_ns_disable(ns);
up_write(&nvmet_config_sem);
return ret ? ret : count; return ret ? ret : count;
} }
......
...@@ -818,6 +818,15 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) ...@@ -818,6 +818,15 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
percpu_ref_exit(&sq->ref); percpu_ref_exit(&sq->ref);
nvmet_auth_sq_free(sq); nvmet_auth_sq_free(sq);
/*
* we must reference the ctrl again after waiting for inflight IO
* to complete. Because admin connect may have sneaked in after we
* store sq->ctrl locally, but before we killed the percpu_ref. the
* admin connect allocates and assigns sq->ctrl, which now needs a
* final ref put, as this ctrl is going away.
*/
ctrl = sq->ctrl;
if (ctrl) { if (ctrl) {
/* /*
* The teardown flow may take some time, and the host may not * The teardown flow may take some time, and the host may not
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment