Commit f70a4792 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'nvme-6.8-2023-12-21' of git://git.infradead.org/nvme into for-6.8/block

Pull NVMe updates from Keith:

"nvme updates for Linux 6.8

 - nvme fabrics spec updates (Guixin, Max)
 - nvme target udpates (Guixin, Evan)
 - nvme attribute refactoring (Daniel)
 - nvme-fc numa fix (Keith)"

* tag 'nvme-6.8-2023-12-21' of git://git.infradead.org/nvme:
  nvme-fc: set numa_node after nvme_init_ctrl
  nvme-fabrics: don't check discovery ioccsz/iorcsz
  nvmet: configfs: use ctrl->instance to track passthru subsystems
  nvme: repack struct nvme_ns_head
  nvme: add csi, ms and nuse to sysfs
  nvme: rename ns attribute group
  nvme: refactor ns info setup function
  nvme: refactor ns info helpers
  nvme: move ns id info to struct nvme_ns_head
  nvmet: remove cntlid_min and cntlid_max check in nvmet_alloc_ctrl
  nvmet: allow identical cntlid_min and cntlid_max settings
  nvme-fabrics: check ioccsz and iorcsz
  nvme: introduce nvme_check_ctrl_fabric_info helper
parents 5165799f 5d51dc8d
This diff is collapsed.
......@@ -3509,10 +3509,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0;
if (lport->dev)
ctrl->ctrl.numa_node = dev_to_node(lport->dev);
else
ctrl->ctrl.numa_node = NUMA_NO_NODE;
INIT_LIST_HEAD(&ctrl->ctrl_list);
ctrl->lport = lport;
ctrl->rport = rport;
......@@ -3557,6 +3553,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0);
if (ret)
goto out_free_queues;
if (lport->dev)
ctrl->ctrl.numa_node = dev_to_node(lport->dev);
/* at this point, teardown path changes to ref counting on nvme ctrl */
......
......@@ -224,10 +224,10 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
return -EINVAL;
}
length = (io.nblocks + 1) << ns->lba_shift;
length = (io.nblocks + 1) << ns->head->lba_shift;
if ((io.control & NVME_RW_PRINFO_PRACT) &&
ns->ms == sizeof(struct t10_pi_tuple)) {
ns->head->ms == sizeof(struct t10_pi_tuple)) {
/*
* Protection information is stripped/inserted by the
* controller.
......@@ -237,11 +237,11 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
meta_len = 0;
metadata = NULL;
} else {
meta_len = (io.nblocks + 1) * ns->ms;
meta_len = (io.nblocks + 1) * ns->head->ms;
metadata = nvme_to_user_ptr(io.metadata);
}
if (ns->features & NVME_NS_EXT_LBAS) {
if (ns->head->features & NVME_NS_EXT_LBAS) {
length += meta_len;
meta_len = 0;
} else if (meta_len) {
......
......@@ -579,7 +579,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
*/
if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
rc = device_add_disk(&head->subsys->dev, head->disk,
nvme_ns_id_attr_groups);
nvme_ns_attr_groups);
if (rc) {
clear_bit(NVME_NSHEAD_DISK_LIVE, &ns->flags);
return;
......
......@@ -16,6 +16,7 @@
#include <linux/rcupdate.h>
#include <linux/wait.h>
#include <linux/t10-pi.h>
#include <linux/ratelimit_types.h>
#include <trace/events/block.h>
......@@ -439,13 +440,27 @@ struct nvme_ns_head {
struct list_head list;
struct srcu_struct srcu;
struct nvme_subsystem *subsys;
unsigned ns_id;
struct nvme_ns_ids ids;
struct list_head entry;
struct kref ref;
bool shared;
int instance;
struct nvme_effects_log *effects;
u64 nuse;
unsigned ns_id;
int lba_shift;
u16 ms;
u16 pi_size;
u8 pi_type;
u8 guard_type;
u16 sgs;
u32 sws;
#ifdef CONFIG_BLK_DEV_ZONED
u64 zsze;
#endif
unsigned long features;
struct ratelimit_state rs_nuse;
struct cdev cdev;
struct device cdev_device;
......@@ -487,17 +502,6 @@ struct nvme_ns {
struct kref kref;
struct nvme_ns_head *head;
int lba_shift;
u16 ms;
u16 pi_size;
u16 sgs;
u32 sws;
u8 pi_type;
u8 guard_type;
#ifdef CONFIG_BLK_DEV_ZONED
u64 zsze;
#endif
unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
#define NVME_NS_ANA_PENDING 2
......@@ -512,9 +516,9 @@ struct nvme_ns {
};
/* NVMe ns supports metadata actions by the controller (generate/strip) */
static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
static inline bool nvme_ns_has_pi(struct nvme_ns_head *head)
{
return ns->pi_type && ns->ms == ns->pi_size;
return head->pi_type && head->ms == head->pi_size;
}
struct nvme_ctrl_ops {
......@@ -646,17 +650,17 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
/*
* Convert a 512B sector number to a device logical block number.
*/
static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector)
static inline u64 nvme_sect_to_lba(struct nvme_ns_head *head, sector_t sector)
{
return sector >> (ns->lba_shift - SECTOR_SHIFT);
return sector >> (head->lba_shift - SECTOR_SHIFT);
}
/*
* Convert a device logical block number to a 512B sector number.
*/
static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
static inline sector_t nvme_lba_to_sect(struct nvme_ns_head *head, u64 lba)
{
return lba << (ns->lba_shift - SECTOR_SHIFT);
return lba << (head->lba_shift - SECTOR_SHIFT);
}
/*
......@@ -862,10 +866,12 @@ int nvme_ns_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
int nvme_ns_head_chr_uring_cmd(struct io_uring_cmd *ioucmd,
unsigned int issue_flags);
int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
struct nvme_id_ns **id);
int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo);
int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
extern const struct attribute_group *nvme_ns_id_attr_groups[];
extern const struct attribute_group *nvme_ns_attr_groups[];
extern const struct pr_ops nvme_pr_ops;
extern const struct block_device_operations nvme_ns_head_ops;
extern const struct attribute_group nvme_dev_attrs_group;
......
......@@ -1418,7 +1418,7 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
goto mr_put;
nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
req->mr->sig_attrs, ns->pi_type);
req->mr->sig_attrs, ns->head->pi_type);
nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
......@@ -2012,7 +2012,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
queue->pi_support &&
(c->common.opcode == nvme_cmd_write ||
c->common.opcode == nvme_cmd_read) &&
nvme_ns_has_pi(ns))
nvme_ns_has_pi(ns->head))
req->use_sig_mr = true;
else
req->use_sig_mr = false;
......
......@@ -114,12 +114,97 @@ static ssize_t nsid_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(nsid);
static struct attribute *nvme_ns_id_attrs[] = {
static ssize_t csi_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return sysfs_emit(buf, "%u\n", dev_to_ns_head(dev)->ids.csi);
}
static DEVICE_ATTR_RO(csi);
static ssize_t metadata_bytes_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sysfs_emit(buf, "%u\n", dev_to_ns_head(dev)->ms);
}
static DEVICE_ATTR_RO(metadata_bytes);
static int ns_head_update_nuse(struct nvme_ns_head *head)
{
struct nvme_id_ns *id;
struct nvme_ns *ns;
int srcu_idx, ret = -EWOULDBLOCK;
/* Avoid issuing commands too often by rate limiting the update */
if (!__ratelimit(&head->rs_nuse))
return 0;
srcu_idx = srcu_read_lock(&head->srcu);
ns = nvme_find_path(head);
if (!ns)
goto out_unlock;
ret = nvme_identify_ns(ns->ctrl, head->ns_id, &id);
if (ret)
goto out_unlock;
head->nuse = le64_to_cpu(id->nuse);
kfree(id);
out_unlock:
srcu_read_unlock(&head->srcu, srcu_idx);
return ret;
}
static int ns_update_nuse(struct nvme_ns *ns)
{
struct nvme_id_ns *id;
int ret;
/* Avoid issuing commands too often by rate limiting the update. */
if (!__ratelimit(&ns->head->rs_nuse))
return 0;
ret = nvme_identify_ns(ns->ctrl, ns->head->ns_id, &id);
if (ret)
goto out_free_id;
ns->head->nuse = le64_to_cpu(id->nuse);
out_free_id:
kfree(id);
return ret;
}
static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns_head *head = dev_to_ns_head(dev);
struct gendisk *disk = dev_to_disk(dev);
struct block_device *bdev = disk->part0;
int ret;
if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
bdev->bd_disk->fops == &nvme_ns_head_ops)
ret = ns_head_update_nuse(head);
else
ret = ns_update_nuse(bdev->bd_disk->private_data);
if (ret)
return ret;
return sysfs_emit(buf, "%llu\n", head->nuse);
}
static DEVICE_ATTR_RO(nuse);
static struct attribute *nvme_ns_attrs[] = {
&dev_attr_wwid.attr,
&dev_attr_uuid.attr,
&dev_attr_nguid.attr,
&dev_attr_eui.attr,
&dev_attr_csi.attr,
&dev_attr_nsid.attr,
&dev_attr_metadata_bytes.attr,
&dev_attr_nuse.attr,
#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
......@@ -127,7 +212,7 @@ static struct attribute *nvme_ns_id_attrs[] = {
NULL,
};
static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
struct device *dev = container_of(kobj, struct device, kobj);
......@@ -157,13 +242,13 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
return a->mode;
}
static const struct attribute_group nvme_ns_id_attr_group = {
.attrs = nvme_ns_id_attrs,
.is_visible = nvme_ns_id_attrs_are_visible,
static const struct attribute_group nvme_ns_attr_group = {
.attrs = nvme_ns_attrs,
.is_visible = nvme_ns_attrs_are_visible,
};
const struct attribute_group *nvme_ns_id_attr_groups[] = {
&nvme_ns_id_attr_group,
const struct attribute_group *nvme_ns_attr_groups[] = {
&nvme_ns_attr_group,
NULL,
};
......
......@@ -11,7 +11,7 @@ int nvme_revalidate_zones(struct nvme_ns *ns)
{
struct request_queue *q = ns->queue;
blk_queue_chunk_sectors(q, ns->zsze);
blk_queue_chunk_sectors(q, ns->head->zsze);
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
return blk_revalidate_disk_zones(ns->disk, NULL);
......@@ -99,11 +99,12 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
goto free_data;
}
ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
if (!is_power_of_2(ns->zsze)) {
ns->head->zsze =
nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze));
if (!is_power_of_2(ns->head->zsze)) {
dev_warn(ns->ctrl->device,
"invalid zone size:%llu for namespace:%u\n",
ns->zsze, ns->head->ns_id);
ns->head->zsze, ns->head->ns_id);
status = -ENODEV;
goto free_data;
}
......@@ -128,7 +129,7 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
sizeof(struct nvme_zone_descriptor);
nr_zones = min_t(unsigned int, nr_zones,
get_capacity(ns->disk) >> ilog2(ns->zsze));
get_capacity(ns->disk) >> ilog2(ns->head->zsze));
bufsize = sizeof(struct nvme_zone_report) +
nr_zones * sizeof(struct nvme_zone_descriptor);
......@@ -147,7 +148,8 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
return NULL;
}
static int nvme_zone_parse_entry(struct nvme_ns *ns,
static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl,
struct nvme_ns_head *head,
struct nvme_zone_descriptor *entry,
unsigned int idx, report_zones_cb cb,
void *data)
......@@ -155,20 +157,20 @@ static int nvme_zone_parse_entry(struct nvme_ns *ns,
struct blk_zone zone = { };
if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
dev_err(ns->ctrl->device, "invalid zone type %#x\n",
dev_err(ctrl->device, "invalid zone type %#x\n",
entry->zt);
return -EINVAL;
}
zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
zone.cond = entry->zs >> 4;
zone.len = ns->zsze;
zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
zone.len = head->zsze;
zone.capacity = nvme_lba_to_sect(head, le64_to_cpu(entry->zcap));
zone.start = nvme_lba_to_sect(head, le64_to_cpu(entry->zslba));
if (zone.cond == BLK_ZONE_COND_FULL)
zone.wp = zone.start + zone.len;
else
zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp));
return cb(&zone, idx, data);
}
......@@ -196,11 +198,11 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
sector &= ~(ns->zsze - 1);
sector &= ~(ns->head->zsze - 1);
while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
memset(report, 0, buflen);
c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector));
ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
if (ret) {
if (ret > 0)
......@@ -213,14 +215,15 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
break;
for (i = 0; i < nz && zone_idx < nr_zones; i++) {
ret = nvme_zone_parse_entry(ns, &report->entries[i],
ret = nvme_zone_parse_entry(ns->ctrl, ns->head,
&report->entries[i],
zone_idx, cb, data);
if (ret)
goto out_free;
zone_idx++;
}
sector += ns->zsze * nz;
sector += ns->head->zsze * nz;
}
if (zone_idx > 0)
......@@ -239,7 +242,7 @@ blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
c->zms.opcode = nvme_cmd_zone_mgmt_send;
c->zms.nsid = cpu_to_le32(ns->head->ns_id);
c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req)));
c->zms.zsa = action;
if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
......
......@@ -1274,7 +1274,7 @@ static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item,
return -EINVAL;
down_write(&nvmet_config_sem);
if (cntlid_min >= to_subsys(item)->cntlid_max)
if (cntlid_min > to_subsys(item)->cntlid_max)
goto out_unlock;
to_subsys(item)->cntlid_min = cntlid_min;
up_write(&nvmet_config_sem);
......@@ -1304,7 +1304,7 @@ static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item,
return -EINVAL;
down_write(&nvmet_config_sem);
if (cntlid_max <= to_subsys(item)->cntlid_min)
if (cntlid_max < to_subsys(item)->cntlid_min)
goto out_unlock;
to_subsys(item)->cntlid_max = cntlid_max;
up_write(&nvmet_config_sem);
......
......@@ -1425,9 +1425,6 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
if (!ctrl->sqs)
goto out_free_changed_ns_list;
if (subsys->cntlid_min > subsys->cntlid_max)
goto out_free_sqs;
ret = ida_alloc_range(&cntlid_ida,
subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL);
......
......@@ -602,7 +602,7 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
goto out_put_file;
}
old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
old = xa_cmpxchg(&passthru_subsystems, ctrl->instance, NULL,
subsys, GFP_KERNEL);
if (xa_is_err(old)) {
ret = xa_err(old);
......@@ -635,7 +635,7 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
static void __nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys)
{
if (subsys->passthru_ctrl) {
xa_erase(&passthru_subsystems, subsys->passthru_ctrl->cntlid);
xa_erase(&passthru_subsystems, subsys->passthru_ctrl->instance);
module_put(subsys->passthru_ctrl->ops->module);
nvme_put_ctrl(subsys->passthru_ctrl);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment