Commit 0d0b660f authored by Christoph Hellwig's avatar Christoph Hellwig

nvme: add ANA support

Add support for Asynchronous Namespace Access as specified in NVMe 1.3
TP 4004.  With ANA each namespace attached to a controller belongs to an
ANA group that describes the characteristics of accessing the namespaces
through this controller.  In the optimized and non-optimized states
namespaces can be accessed regularly, although in a multi-pathing
environment we should always prefer to access a namespace through a
controller where an optimized relationship exists.  Namespaces in
Inaccessible, Permanent-Loss or Change state for a given controller
should not be accessed.

The states are updated through reading the ANA log page, which is read
once during controller initialization, whenever the ANA change notice
AEN is received, or when one of the ANA specific status codes that
signal a state change is received on a command.

The ANA state is kept in the nvme_ns structure, which makes the checks in
the fast path very simple.  Updating the ANA state when reading the log
page is also very simple, the only downside is that finding the initial
ANA state when scanning for namespaces is a bit cumbersome.

The gendisk for a ns_head is only registered once a live path for it
exists.  Without that the kernel would hang during partition scanning.

Includes fixes and improvements from Hannes Reinecke.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarKeith Busch <keith.busch@intel.com>
Reviewed-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: default avatarJohannes Thumshirn <jthumshirn@suse.de>
parent 8decf5d5
...@@ -1035,18 +1035,18 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) ...@@ -1035,18 +1035,18 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
EXPORT_SYMBOL_GPL(nvme_set_queue_count); EXPORT_SYMBOL_GPL(nvme_set_queue_count);
#define NVME_AEN_SUPPORTED \ #define NVME_AEN_SUPPORTED \
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT) (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
static void nvme_enable_aen(struct nvme_ctrl *ctrl) static void nvme_enable_aen(struct nvme_ctrl *ctrl)
{ {
u32 result; u32 supported = ctrl->oaes & NVME_AEN_SUPPORTED, result;
int status; int status;
status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, supported, NULL,
ctrl->oaes & NVME_AEN_SUPPORTED, NULL, 0, &result); 0, &result);
if (status) if (status)
dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n", dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
ctrl->oaes & NVME_AEN_SUPPORTED); supported);
} }
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
...@@ -2370,6 +2370,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -2370,6 +2370,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
nvme_set_queue_limits(ctrl, ctrl->admin_q); nvme_set_queue_limits(ctrl, ctrl->admin_q);
ctrl->sgls = le32_to_cpu(id->sgls); ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas); ctrl->kas = le16_to_cpu(id->kas);
ctrl->max_namespaces = le32_to_cpu(id->mnan);
if (id->rtd3e) { if (id->rtd3e) {
/* us -> s */ /* us -> s */
...@@ -2429,8 +2430,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -2429,8 +2430,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
} }
ret = nvme_mpath_init(ctrl, id);
kfree(id); kfree(id);
if (ret < 0)
return ret;
if (ctrl->apst_enabled && !prev_apst_enabled) if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device); dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled) else if (!ctrl->apst_enabled && prev_apst_enabled)
...@@ -2649,6 +2654,10 @@ static struct attribute *nvme_ns_id_attrs[] = { ...@@ -2649,6 +2654,10 @@ static struct attribute *nvme_ns_id_attrs[] = {
&dev_attr_nguid.attr, &dev_attr_nguid.attr,
&dev_attr_eui.attr, &dev_attr_eui.attr,
&dev_attr_nsid.attr, &dev_attr_nsid.attr,
#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
#endif
NULL, NULL,
}; };
...@@ -2671,6 +2680,14 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, ...@@ -2671,6 +2680,14 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
return 0; return 0;
} }
#ifdef CONFIG_NVME_MULTIPATH
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
return 0;
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;
}
#endif
return a->mode; return a->mode;
} }
...@@ -3044,8 +3061,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -3044,8 +3061,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_get_ctrl(ctrl); nvme_get_ctrl(ctrl);
kfree(id);
device_add_disk(ctrl->device, ns->disk); device_add_disk(ctrl->device, ns->disk);
if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_id_attr_group)) &nvme_ns_id_attr_group))
...@@ -3055,8 +3070,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -3055,8 +3070,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
pr_warn("%s: failed to register lightnvm sysfs group for identification\n", pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
ns->disk->disk_name); ns->disk->disk_name);
nvme_mpath_add_disk(ns->head); nvme_mpath_add_disk(ns, id);
nvme_fault_inject_init(ns); nvme_fault_inject_init(ns);
kfree(id);
return; return;
out_unlink_ns: out_unlink_ns:
mutex_lock(&ctrl->subsys->lock); mutex_lock(&ctrl->subsys->lock);
...@@ -3364,6 +3381,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) ...@@ -3364,6 +3381,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
case NVME_AER_NOTICE_FW_ACT_STARTING: case NVME_AER_NOTICE_FW_ACT_STARTING:
queue_work(nvme_wq, &ctrl->fw_act_work); queue_work(nvme_wq, &ctrl->fw_act_work);
break; break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
if (!ctrl->ana_log_buf)
break;
queue_work(nvme_wq, &ctrl->ana_work);
break;
#endif
default: default:
dev_warn(ctrl->device, "async event result %08x\n", result); dev_warn(ctrl->device, "async event result %08x\n", result);
} }
...@@ -3396,6 +3420,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event); ...@@ -3396,6 +3420,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl) void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{ {
nvme_mpath_stop(ctrl);
nvme_stop_keep_alive(ctrl); nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work); flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work); flush_work(&ctrl->scan_work);
...@@ -3433,6 +3458,7 @@ static void nvme_free_ctrl(struct device *dev) ...@@ -3433,6 +3458,7 @@ static void nvme_free_ctrl(struct device *dev)
ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_simple_remove(&nvme_instance_ida, ctrl->instance);
kfree(ctrl->effects); kfree(ctrl->effects);
nvme_mpath_uninit(ctrl);
if (subsys) { if (subsys) {
mutex_lock(&subsys->lock); mutex_lock(&subsys->lock);
......
This diff is collapsed.
...@@ -183,6 +183,7 @@ struct nvme_ctrl { ...@@ -183,6 +183,7 @@ struct nvme_ctrl {
u16 oacs; u16 oacs;
u16 nssa; u16 nssa;
u16 nr_streams; u16 nr_streams;
u32 max_namespaces;
atomic_t abort_limit; atomic_t abort_limit;
u8 vwc; u8 vwc;
u32 vs; u32 vs;
...@@ -205,6 +206,19 @@ struct nvme_ctrl { ...@@ -205,6 +206,19 @@ struct nvme_ctrl {
struct work_struct fw_act_work; struct work_struct fw_act_work;
unsigned long events; unsigned long events;
#ifdef CONFIG_NVME_MULTIPATH
/* asymmetric namespace access: */
u8 anacap;
u8 anatt;
u32 anagrpmax;
u32 nanagrpid;
struct mutex ana_lock;
struct nvme_ana_rsp_hdr *ana_log_buf;
size_t ana_log_size;
struct timer_list anatt_timer;
struct work_struct ana_work;
#endif
/* Power saving configuration */ /* Power saving configuration */
u64 ps_max_latency_us; u64 ps_max_latency_us;
bool apst_enabled; bool apst_enabled;
...@@ -269,6 +283,7 @@ struct nvme_ns_head { ...@@ -269,6 +283,7 @@ struct nvme_ns_head {
struct bio_list requeue_list; struct bio_list requeue_list;
spinlock_t requeue_lock; spinlock_t requeue_lock;
struct work_struct requeue_work; struct work_struct requeue_work;
struct mutex lock;
#endif #endif
struct list_head list; struct list_head list;
struct srcu_struct srcu; struct srcu_struct srcu;
...@@ -295,6 +310,10 @@ struct nvme_ns { ...@@ -295,6 +310,10 @@ struct nvme_ns {
struct nvme_ctrl *ctrl; struct nvme_ctrl *ctrl;
struct request_queue *queue; struct request_queue *queue;
struct gendisk *disk; struct gendisk *disk;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_ana_state ana_state;
u32 ana_grpid;
#endif
struct list_head siblings; struct list_head siblings;
struct nvm_dev *ndev; struct nvm_dev *ndev;
struct kref kref; struct kref kref;
...@@ -309,6 +328,7 @@ struct nvme_ns { ...@@ -309,6 +328,7 @@ struct nvme_ns {
unsigned long flags; unsigned long flags;
#define NVME_NS_REMOVING 0 #define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1 #define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
u16 noiob; u16 noiob;
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
...@@ -450,13 +470,17 @@ extern const struct attribute_group nvme_ns_id_attr_group; ...@@ -450,13 +470,17 @@ extern const struct attribute_group nvme_ns_id_attr_group;
extern const struct block_device_operations nvme_ns_head_ops; extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags); struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req); void nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
void nvme_mpath_remove_disk(struct nvme_ns_head *head); void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{ {
...@@ -475,7 +499,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) ...@@ -475,7 +499,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
kblockd_schedule_work(&head->requeue_work); kblockd_schedule_work(&head->requeue_work);
} }
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
#else #else
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return false;
}
/* /*
* Without the multipath code enabled, multiple controller per subsystems are * Without the multipath code enabled, multiple controller per subsystems are
* visible as devices and thus we cannot use the subsystem instance. * visible as devices and thus we cannot use the subsystem instance.
...@@ -497,7 +528,8 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, ...@@ -497,7 +528,8 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
{ {
return 0; return 0;
} }
static inline void nvme_mpath_add_disk(struct nvme_ns_head *head) static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
struct nvme_id_ns *id)
{ {
} }
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
...@@ -509,6 +541,17 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) ...@@ -509,6 +541,17 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
{ {
} }
static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
struct nvme_id_ctrl *id)
{
return 0;
}
static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
}
static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
{
}
#endif /* CONFIG_NVME_MULTIPATH */ #endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM #ifdef CONFIG_NVM
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment