Commit 1ed4211d authored by Jens Axboe's avatar Jens Axboe

Merge tag 'nvme-5.10-2020-09-27' of git://git.infradead.org/nvme into for-5.10/drivers

Pull NVMe updates from Christoph:

"nvme updates for 5.10

 - fix keep alive timer modification (Amit Engel)
 - order the PCI ID list more sensibly (Andy Shevchenko)
 - cleanup the open by controller helper (Chaitanya Kulkarni)
 - use an xarray for th CSE log lookup (Chaitanya Kulkarni)
 - support ZNS in nvmet passthrough mode (Chaitanya Kulkarni)
 - fix nvme_ns_report_zones (me)
 - add a sanity check to nvmet-fc (James Smart)
 - fix interrupt allocation when too many polled queues are specified
   (Jeffle Xu)
 - small nvmet-tcp optimization (Mark Wunderlich)"

* tag 'nvme-5.10-2020-09-27' of git://git.infradead.org/nvme:
  nvme-pci: allocate separate interrupt for the reserved non-polled I/O queue
  nvme: fix error handling in nvme_ns_report_zones
  nvmet-fc: fix missing check for no hostport struct
  nvmet: add passthru ZNS support
  nvmet: handle keep-alive timer when kato is modified by a set features cmd
  nvmet-tcp: have queue io_work context run on sock incoming cpu
  nvme-pci: Move enumeration by class to be last in the table
  nvme: use an xarray to lookup the Commands Supported and Effects log
  nvme: lift the file open code from nvme_ctrl_get_by_path
parents 163090c1 21cc2f3f
......@@ -3025,26 +3025,10 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi,
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
}
static struct nvme_cel *nvme_find_cel(struct nvme_ctrl *ctrl, u8 csi)
{
struct nvme_cel *cel, *ret = NULL;
spin_lock_irq(&ctrl->lock);
list_for_each_entry(cel, &ctrl->cels, entry) {
if (cel->csi == csi) {
ret = cel;
break;
}
}
spin_unlock_irq(&ctrl->lock);
return ret;
}
static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
struct nvme_effects_log **log)
{
struct nvme_cel *cel = nvme_find_cel(ctrl, csi);
struct nvme_cel *cel = xa_load(&ctrl->cels, csi);
int ret;
if (cel)
......@@ -3062,10 +3046,7 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
}
cel->csi = csi;
spin_lock_irq(&ctrl->lock);
list_add_tail(&cel->entry, &ctrl->cels);
spin_unlock_irq(&ctrl->lock);
xa_store(&ctrl->cels, cel->csi, cel, GFP_KERNEL);
out:
*log = &cel->log;
return 0;
......@@ -4448,15 +4429,11 @@ static void nvme_free_ctrl(struct device *dev)
struct nvme_ctrl *ctrl =
container_of(dev, struct nvme_ctrl, ctrl_device);
struct nvme_subsystem *subsys = ctrl->subsys;
struct nvme_cel *cel, *next;
if (!subsys || ctrl->instance != subsys->instance)
ida_simple_remove(&nvme_instance_ida, ctrl->instance);
list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
list_del(&cel->entry);
kfree(cel);
}
xa_destroy(&ctrl->cels);
nvme_mpath_uninit(ctrl);
__free_page(ctrl->discard_page);
......@@ -4488,7 +4465,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
spin_lock_init(&ctrl->lock);
mutex_init(&ctrl->scan_lock);
INIT_LIST_HEAD(&ctrl->namespaces);
INIT_LIST_HEAD(&ctrl->cels);
xa_init(&ctrl->cels);
init_rwsem(&ctrl->namespaces_rwsem);
ctrl->dev = dev;
ctrl->ops = ops;
......@@ -4668,28 +4645,13 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_sync_queues);
struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path)
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file)
{
struct nvme_ctrl *ctrl;
struct file *f;
f = filp_open(path, O_RDWR, 0);
if (IS_ERR(f))
return ERR_CAST(f);
if (f->f_op != &nvme_dev_fops) {
ctrl = ERR_PTR(-EINVAL);
goto out_close;
}
ctrl = f->private_data;
nvme_get_ctrl(ctrl);
out_close:
filp_close(f, NULL);
return ctrl;
if (file->f_op != &nvme_dev_fops)
return NULL;
return file->private_data;
}
EXPORT_SYMBOL_NS_GPL(nvme_ctrl_get_by_path, NVME_TARGET_PASSTHRU);
EXPORT_SYMBOL_NS_GPL(nvme_ctrl_from_file, NVME_TARGET_PASSTHRU);
/*
* Check we didn't inadvertently grow the command structure sizes:
......
......@@ -300,7 +300,7 @@ struct nvme_ctrl {
unsigned long quirks;
struct nvme_id_power_state psd[32];
struct nvme_effects_log *effects;
struct list_head cels;
struct xarray cels;
struct work_struct scan_work;
struct work_struct async_event_work;
struct delayed_work ka_work;
......@@ -822,7 +822,7 @@ static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
u8 opcode);
void nvme_execute_passthru_rq(struct request *rq);
struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path);
struct nvme_ctrl *nvme_ctrl_from_file(struct file *file);
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
void nvme_put_ns(struct nvme_ns *ns);
......
......@@ -2038,32 +2038,30 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
.calc_sets = nvme_calc_irq_sets,
.priv = dev,
};
unsigned int irq_queues, this_p_queues;
unsigned int irq_queues, poll_queues;
/*
* Poll queues don't need interrupts, but we need at least one IO
* queue left over for non-polled IO.
* Poll queues don't need interrupts, but we need at least one I/O queue
* left over for non-polled I/O.
*/
this_p_queues = dev->nr_poll_queues;
if (this_p_queues >= nr_io_queues) {
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
} else {
irq_queues = nr_io_queues - this_p_queues + 1;
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
poll_queues = min(dev->nr_poll_queues, nr_io_queues - 1);
dev->io_queues[HCTX_TYPE_POLL] = poll_queues;
/* Initialize for the single interrupt case */
/*
* Initialize for the single interrupt case, will be updated in
* nvme_calc_irq_sets().
*/
dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
dev->io_queues[HCTX_TYPE_READ] = 0;
/*
* Some Apple controllers require all queues to use the
* first vector.
* We need interrupts for the admin queue and each non-polled I/O queue,
* but some Apple controllers require all queues to use the first
* vector.
*/
if (dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR)
irq_queues = 1;
irq_queues = 1;
if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR))
irq_queues += (nr_io_queues - poll_queues);
return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues,
PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
}
......@@ -3187,7 +3185,6 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
.driver_data = NVME_QUIRK_SINGLE_VECTOR },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
......@@ -3195,6 +3192,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_SINGLE_VECTOR |
NVME_QUIRK_128_BYTES_SQES |
NVME_QUIRK_SHARED_TAGS },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, nvme_id_table);
......
......@@ -133,28 +133,6 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
return NULL;
}
static int __nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
struct nvme_zone_report *report,
size_t buflen)
{
struct nvme_command c = { };
int ret;
c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
c.zmr.zra = NVME_ZRA_ZONE_REPORT;
c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
if (ret)
return ret;
return le64_to_cpu(report->nr_zones);
}
static int nvme_zone_parse_entry(struct nvme_ns *ns,
struct nvme_zone_descriptor *entry,
unsigned int idx, report_zones_cb cb,
......@@ -182,6 +160,7 @@ static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data)
{
struct nvme_zone_report *report;
struct nvme_command c = { };
int ret, zone_idx = 0;
unsigned int nz, i;
size_t buflen;
......@@ -190,14 +169,26 @@ static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
if (!report)
return -ENOMEM;
c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
c.zmr.zra = NVME_ZRA_ZONE_REPORT;
c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
sector &= ~(ns->zsze - 1);
while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
memset(report, 0, buflen);
ret = __nvme_ns_report_zones(ns, sector, report, buflen);
if (ret < 0)
c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
if (ret) {
if (ret > 0)
ret = -EIO;
goto out_free;
}
nz = min_t(unsigned int, ret, nr_zones);
nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
if (!nz)
break;
......
......@@ -727,7 +727,9 @@ u16 nvmet_set_feat_kato(struct nvmet_req *req)
{
u32 val32 = le32_to_cpu(req->cmd->common.cdw11);
nvmet_stop_keep_alive_timer(req->sq->ctrl);
req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000);
nvmet_start_keep_alive_timer(req->sq->ctrl);
nvmet_set_result(req, req->sq->ctrl->kato);
......
......@@ -395,7 +395,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work)
nvmet_ctrl_fatal_error(ctrl);
}
static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
return;
......@@ -407,7 +407,7 @@ static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
}
static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
{
if (unlikely(ctrl->kato == 0))
return;
......
......@@ -1019,7 +1019,7 @@ static void
nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport)
{
/* if LLDD not implemented, leave as NULL */
if (!hostport->hosthandle)
if (!hostport || !hostport->hosthandle)
return;
nvmet_fc_hostport_put(hostport);
......
......@@ -395,6 +395,8 @@ void nvmet_get_feat_async_event(struct nvmet_req *req);
u16 nvmet_set_feat_kato(struct nvmet_req *req);
u16 nvmet_set_feat_async_event(struct nvmet_req *req, u32 mask);
void nvmet_execute_async_event(struct nvmet_req *req);
void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl);
void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl);
u16 nvmet_parse_connect_cmd(struct nvmet_req *req);
void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id);
......
......@@ -456,10 +456,26 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
case NVME_ID_CNS_CS_CTRL:
switch (req->cmd->identify.csi) {
case NVME_CSI_ZNS:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
}
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
case NVME_ID_CNS_NS:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
case NVME_ID_CNS_CS_NS:
switch (req->cmd->identify.csi) {
case NVME_CSI_ZNS:
req->execute = nvmet_passthru_execute_cmd;
req->p.use_workqueue = true;
return NVME_SC_SUCCESS;
}
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
default:
return nvmet_setup_passthru_command(req);
}
......@@ -474,6 +490,7 @@ u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req)
int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
{
struct nvme_ctrl *ctrl;
struct file *file;
int ret = -EINVAL;
void *old;
......@@ -488,24 +505,29 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
goto out_unlock;
}
ctrl = nvme_ctrl_get_by_path(subsys->passthru_ctrl_path);
if (IS_ERR(ctrl)) {
ret = PTR_ERR(ctrl);
file = filp_open(subsys->passthru_ctrl_path, O_RDWR, 0);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
goto out_unlock;
}
ctrl = nvme_ctrl_from_file(file);
if (!ctrl) {
pr_err("failed to open nvme controller %s\n",
subsys->passthru_ctrl_path);
goto out_unlock;
goto out_put_file;
}
old = xa_cmpxchg(&passthru_subsystems, ctrl->cntlid, NULL,
subsys, GFP_KERNEL);
if (xa_is_err(old)) {
ret = xa_err(old);
goto out_put_ctrl;
goto out_put_file;
}
if (old)
goto out_put_ctrl;
goto out_put_file;
subsys->passthru_ctrl = ctrl;
subsys->ver = ctrl->vs;
......@@ -516,13 +538,12 @@ int nvmet_passthru_ctrl_enable(struct nvmet_subsys *subsys)
NVME_TERTIARY(subsys->ver));
subsys->ver = NVME_VS(1, 2, 1);
}
nvme_get_ctrl(ctrl);
__module_get(subsys->passthru_ctrl->ops->module);
mutex_unlock(&subsys->lock);
return 0;
ret = 0;
out_put_ctrl:
nvme_put_ctrl(ctrl);
out_put_file:
filp_close(file, NULL);
out_unlock:
mutex_unlock(&subsys->lock);
return ret;
......
......@@ -94,7 +94,6 @@ struct nvmet_tcp_queue {
struct socket *sock;
struct nvmet_tcp_port *port;
struct work_struct io_work;
int cpu;
struct nvmet_cq nvme_cq;
struct nvmet_sq nvme_sq;
......@@ -144,7 +143,6 @@ struct nvmet_tcp_port {
struct work_struct accept_work;
struct nvmet_port *nport;
struct sockaddr_storage addr;
int last_cpu;
void (*data_ready)(struct sock *);
};
......@@ -219,6 +217,11 @@ static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd)
list_add_tail(&cmd->entry, &cmd->queue->free_list);
}
static inline int queue_cpu(struct nvmet_tcp_queue *queue)
{
return queue->sock->sk->sk_incoming_cpu;
}
static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue)
{
return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
......@@ -506,7 +509,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
struct nvmet_tcp_queue *queue = cmd->queue;
llist_add(&cmd->lentry, &queue->resp_list);
queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
}
static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
......@@ -1223,7 +1226,7 @@ static void nvmet_tcp_io_work(struct work_struct *w)
* We exahusted our budget, requeue our selves
*/
if (pending)
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
......@@ -1383,7 +1386,7 @@ static void nvmet_tcp_data_ready(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (likely(queue))
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
read_unlock_bh(&sk->sk_callback_lock);
}
......@@ -1403,7 +1406,7 @@ static void nvmet_tcp_write_space(struct sock *sk)
if (sk_stream_is_writeable(sk)) {
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
}
out:
read_unlock_bh(&sk->sk_callback_lock);
......@@ -1512,9 +1515,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_free_connect;
port->last_cpu = cpumask_next_wrap(port->last_cpu,
cpu_online_mask, -1, false);
queue->cpu = port->last_cpu;
nvmet_prepare_receive_pdu(queue);
mutex_lock(&nvmet_tcp_queue_mutex);
......@@ -1525,7 +1525,7 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_destroy_sq;
queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work);
queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
return 0;
out_destroy_sq:
......@@ -1612,7 +1612,6 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
}
port->nport = nport;
port->last_cpu = -1;
INIT_WORK(&port->accept_work, nvmet_tcp_accept_work);
if (port->nport->inline_data_size < 0)
port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment