Commit 01bb12fc authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-5.7-rc1' of git://git.infradead.org/nvme into for-5.7/drivers

Pull 5.7 NVMe updates from Keith.

* 'nvme-5.7-rc1' of git://git.infradead.org/nvme: (42 commits)
  nvme: cleanup namespace identifier reporting in nvme_init_ns_head
  nvme: rename __nvme_find_ns_head to nvme_find_ns_head
  nvme: refactor nvme_identify_ns_descs error handling
  nvme-tcp: Add warning on state change failure at nvme_tcp_setup_ctrl
  nvme-rdma: Add warning on state change failure at nvme_rdma_setup_ctrl
  nvme: Fix controller creation races with teardown flow
  nvme: Make nvme_uninit_ctrl symmetric to nvme_init_ctrl
  nvme: Fix ctrl use-after-free during sysfs deletion
  nvme-pci: Re-order nvme_pci_free_ctrl
  nvme: Remove unused return code from nvme_delete_ctrl_sync
  nvme: Use nvme_state_terminal helper
  nvme: release ida resources
  nvme: Add compat_ioctl handler for NVME_IOCTL_SUBMIT_IO
  nvmet-tcp: optimize tcp stack TX when data digest is used
  nvme-fabrics: Use scnprintf() for avoiding potential buffer overflow
  nvme-multipath: do not reset on unknown status
  nvmet-rdma: allocate RW ctxs according to mdts
  nvmet-rdma: Implement get_mdts controller op
  nvmet: Add get_mdts op for controllers
  nvme-pci: properly print controller address
  ...
parents 5ae3a2c0 43fcd9e1
...@@ -32,8 +32,6 @@ config NVME_HWMON ...@@ -32,8 +32,6 @@ config NVME_HWMON
a hardware monitoring device will be created for each NVMe drive a hardware monitoring device will be created for each NVMe drive
in the system. in the system.
If unsure, say N.
config NVME_FABRICS config NVME_FABRICS
tristate tristate
......
This diff is collapsed.
...@@ -105,14 +105,14 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size) ...@@ -105,14 +105,14 @@ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
int len = 0; int len = 0;
if (ctrl->opts->mask & NVMF_OPT_TRADDR) if (ctrl->opts->mask & NVMF_OPT_TRADDR)
len += snprintf(buf, size, "traddr=%s", ctrl->opts->traddr); len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr);
if (ctrl->opts->mask & NVMF_OPT_TRSVCID) if (ctrl->opts->mask & NVMF_OPT_TRSVCID)
len += snprintf(buf + len, size - len, "%strsvcid=%s", len += scnprintf(buf + len, size - len, "%strsvcid=%s",
(len) ? "," : "", ctrl->opts->trsvcid); (len) ? "," : "", ctrl->opts->trsvcid);
if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)
len += snprintf(buf + len, size - len, "%shost_traddr=%s", len += scnprintf(buf + len, size - len, "%shost_traddr=%s",
(len) ? "," : "", ctrl->opts->host_traddr); (len) ? "," : "", ctrl->opts->host_traddr);
len += snprintf(buf + len, size - len, "\n"); len += scnprintf(buf + len, size - len, "\n");
return len; return len;
} }
......
...@@ -3181,10 +3181,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -3181,10 +3181,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto fail_ctrl; goto fail_ctrl;
} }
nvme_get_ctrl(&ctrl->ctrl);
if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
nvme_put_ctrl(&ctrl->ctrl);
dev_err(ctrl->ctrl.device, dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: failed to schedule initial connect\n", "NVME-FC{%d}: failed to schedule initial connect\n",
ctrl->cnum); ctrl->cnum);
......
...@@ -64,17 +64,12 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, ...@@ -64,17 +64,12 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
} }
} }
void nvme_failover_req(struct request *req) bool nvme_failover_req(struct request *req)
{ {
struct nvme_ns *ns = req->q->queuedata; struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status; u16 status = nvme_req(req)->status;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&ns->head->requeue_lock, flags);
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
blk_mq_end_request(req, 0);
switch (status & 0x7ff) { switch (status & 0x7ff) {
case NVME_SC_ANA_TRANSITION: case NVME_SC_ANA_TRANSITION:
case NVME_SC_ANA_INACCESSIBLE: case NVME_SC_ANA_INACCESSIBLE:
...@@ -103,15 +98,17 @@ void nvme_failover_req(struct request *req) ...@@ -103,15 +98,17 @@ void nvme_failover_req(struct request *req)
nvme_mpath_clear_current_path(ns); nvme_mpath_clear_current_path(ns);
break; break;
default: default:
/* /* This was a non-ANA error so follow the normal error path. */
* Reset the controller for any non-ANA error as we don't know return false;
* what caused the error.
*/
nvme_reset_ctrl(ns->ctrl);
break;
} }
spin_lock_irqsave(&ns->head->requeue_lock, flags);
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
blk_mq_end_request(req, 0);
kblockd_schedule_work(&ns->head->requeue_work); kblockd_schedule_work(&ns->head->requeue_work);
return true;
} }
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
......
...@@ -259,6 +259,7 @@ struct nvme_ctrl { ...@@ -259,6 +259,7 @@ struct nvme_ctrl {
struct nvme_command ka_cmd; struct nvme_command ka_cmd;
struct work_struct fw_act_work; struct work_struct fw_act_work;
unsigned long events; unsigned long events;
bool created;
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
/* asymmetric namespace access: */ /* asymmetric namespace access: */
...@@ -550,7 +551,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); ...@@ -550,7 +551,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags); struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req); bool nvme_failover_req(struct request *req);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
...@@ -599,8 +600,9 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, ...@@ -599,8 +600,9 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance);
} }
static inline void nvme_failover_req(struct request *req) static inline bool nvme_failover_req(struct request *req)
{ {
return false;
} }
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{ {
......
...@@ -971,39 +971,25 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) ...@@ -971,39 +971,25 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
nvme_end_request(req, cqe->status, cqe->result); nvme_end_request(req, cqe->status, cqe->result);
} }
static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
{
while (start != end) {
nvme_handle_cqe(nvmeq, start);
if (++start == nvmeq->q_depth)
start = 0;
}
}
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{ {
if (nvmeq->cq_head == nvmeq->q_depth - 1) { if (++nvmeq->cq_head == nvmeq->q_depth) {
nvmeq->cq_head = 0; nvmeq->cq_head = 0;
nvmeq->cq_phase = !nvmeq->cq_phase; nvmeq->cq_phase ^= 1;
} else {
nvmeq->cq_head++;
} }
} }
static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, static inline int nvme_process_cq(struct nvme_queue *nvmeq)
u16 *end, unsigned int tag)
{ {
int found = 0; int found = 0;
*start = nvmeq->cq_head;
while (nvme_cqe_pending(nvmeq)) { while (nvme_cqe_pending(nvmeq)) {
if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag) found++;
found++; nvme_handle_cqe(nvmeq, nvmeq->cq_head);
nvme_update_cq_head(nvmeq); nvme_update_cq_head(nvmeq);
} }
*end = nvmeq->cq_head;
if (*start != *end) if (found)
nvme_ring_cq_doorbell(nvmeq); nvme_ring_cq_doorbell(nvmeq);
return found; return found;
} }
...@@ -1012,21 +998,16 @@ static irqreturn_t nvme_irq(int irq, void *data) ...@@ -1012,21 +998,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
{ {
struct nvme_queue *nvmeq = data; struct nvme_queue *nvmeq = data;
irqreturn_t ret = IRQ_NONE; irqreturn_t ret = IRQ_NONE;
u16 start, end;
/* /*
* The rmb/wmb pair ensures we see all updates from a previous run of * The rmb/wmb pair ensures we see all updates from a previous run of
* the irq handler, even if that was on another CPU. * the irq handler, even if that was on another CPU.
*/ */
rmb(); rmb();
nvme_process_cq(nvmeq, &start, &end, -1); if (nvme_process_cq(nvmeq))
ret = IRQ_HANDLED;
wmb(); wmb();
if (start != end) {
nvme_complete_cqes(nvmeq, start, end);
return IRQ_HANDLED;
}
return ret; return ret;
} }
...@@ -1039,46 +1020,30 @@ static irqreturn_t nvme_irq_check(int irq, void *data) ...@@ -1039,46 +1020,30 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
} }
/* /*
* Poll for completions any queue, including those not dedicated to polling. * Poll for completions for any interrupt driven queue
* Can be called from any context. * Can be called from any context.
*/ */
static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
{ {
struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
u16 start, end;
int found;
/* WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));
* For a poll queue we need to protect against the polling thread
* using the CQ lock. For normal interrupt driven threads we have
* to disable the interrupt to avoid racing with it.
*/
if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) {
spin_lock(&nvmeq->cq_poll_lock);
found = nvme_process_cq(nvmeq, &start, &end, tag);
spin_unlock(&nvmeq->cq_poll_lock);
} else {
disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
found = nvme_process_cq(nvmeq, &start, &end, tag);
enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
}
nvme_complete_cqes(nvmeq, start, end); disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
return found; nvme_process_cq(nvmeq);
enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
} }
static int nvme_poll(struct blk_mq_hw_ctx *hctx) static int nvme_poll(struct blk_mq_hw_ctx *hctx)
{ {
struct nvme_queue *nvmeq = hctx->driver_data; struct nvme_queue *nvmeq = hctx->driver_data;
u16 start, end;
bool found; bool found;
if (!nvme_cqe_pending(nvmeq)) if (!nvme_cqe_pending(nvmeq))
return 0; return 0;
spin_lock(&nvmeq->cq_poll_lock); spin_lock(&nvmeq->cq_poll_lock);
found = nvme_process_cq(nvmeq, &start, &end, -1); found = nvme_process_cq(nvmeq);
nvme_complete_cqes(nvmeq, start, end);
spin_unlock(&nvmeq->cq_poll_lock); spin_unlock(&nvmeq->cq_poll_lock);
return found; return found;
...@@ -1255,7 +1220,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) ...@@ -1255,7 +1220,12 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
/* /*
* Did we miss an interrupt? * Did we miss an interrupt?
*/ */
if (nvme_poll_irqdisable(nvmeq, req->tag)) { if (test_bit(NVMEQ_POLLED, &nvmeq->flags))
nvme_poll(req->mq_hctx);
else
nvme_poll_irqdisable(nvmeq);
if (blk_mq_request_completed(req)) {
dev_warn(dev->ctrl.device, dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, completion polled\n", "I/O %d QID %d timeout, completion polled\n",
req->tag, nvmeq->qid); req->tag, nvmeq->qid);
...@@ -1398,7 +1368,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) ...@@ -1398,7 +1368,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
else else
nvme_disable_ctrl(&dev->ctrl); nvme_disable_ctrl(&dev->ctrl);
nvme_poll_irqdisable(nvmeq, -1); nvme_poll_irqdisable(nvmeq);
} }
/* /*
...@@ -1409,13 +1379,10 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) ...@@ -1409,13 +1379,10 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
*/ */
static void nvme_reap_pending_cqes(struct nvme_dev *dev) static void nvme_reap_pending_cqes(struct nvme_dev *dev)
{ {
u16 start, end;
int i; int i;
for (i = dev->ctrl.queue_count - 1; i > 0; i--) { for (i = dev->ctrl.queue_count - 1; i > 0; i--)
nvme_process_cq(&dev->queues[i], &start, &end, -1); nvme_process_cq(&dev->queues[i]);
nvme_complete_cqes(&dev->queues[i], start, end);
}
} }
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
...@@ -2503,13 +2470,13 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) ...@@ -2503,13 +2470,13 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
struct nvme_dev *dev = to_nvme_dev(ctrl); struct nvme_dev *dev = to_nvme_dev(ctrl);
nvme_dbbuf_dma_free(dev); nvme_dbbuf_dma_free(dev);
put_device(dev->dev);
nvme_free_tagset(dev); nvme_free_tagset(dev);
if (dev->ctrl.admin_q) if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q); blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
free_opal_dev(dev->ctrl.opal_dev); free_opal_dev(dev->ctrl.opal_dev);
mempool_destroy(dev->iod_mempool); mempool_destroy(dev->iod_mempool);
put_device(dev->dev);
kfree(dev->queues);
kfree(dev); kfree(dev);
} }
...@@ -2689,7 +2656,7 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) ...@@ -2689,7 +2656,7 @@ static int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
{ {
struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
return snprintf(buf, size, "%s", dev_name(&pdev->dev)); return snprintf(buf, size, "%s\n", dev_name(&pdev->dev));
} }
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
...@@ -2835,7 +2802,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) ...@@ -2835,7 +2802,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
nvme_reset_ctrl(&dev->ctrl); nvme_reset_ctrl(&dev->ctrl);
nvme_get_ctrl(&dev->ctrl);
async_schedule(nvme_async_probe, dev); async_schedule(nvme_async_probe, dev);
return 0; return 0;
...@@ -2907,10 +2873,9 @@ static void nvme_remove(struct pci_dev *pdev) ...@@ -2907,10 +2873,9 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_free_host_mem(dev); nvme_free_host_mem(dev);
nvme_dev_remove_admin(dev); nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0); nvme_free_queues(dev, 0);
nvme_uninit_ctrl(&dev->ctrl);
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
nvme_dev_unmap(dev); nvme_dev_unmap(dev);
nvme_put_ctrl(&dev->ctrl); nvme_uninit_ctrl(&dev->ctrl);
} }
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
......
...@@ -1022,8 +1022,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -1022,8 +1022,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
if (!changed) { if (!changed) {
/* state change failure is ok if we're in DELETING state */ /*
* state change failure is ok if we're in DELETING state,
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
WARN_ON_ONCE(new);
ret = -EINVAL; ret = -EINVAL;
goto destroy_io; goto destroy_io;
} }
...@@ -2043,8 +2048,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ...@@ -2043,8 +2048,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n", dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr); ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
nvme_get_ctrl(&ctrl->ctrl);
mutex_lock(&nvme_rdma_ctrl_mutex); mutex_lock(&nvme_rdma_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list); list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
mutex_unlock(&nvme_rdma_ctrl_mutex); mutex_unlock(&nvme_rdma_ctrl_mutex);
......
...@@ -20,6 +20,16 @@ ...@@ -20,6 +20,16 @@
struct nvme_tcp_queue; struct nvme_tcp_queue;
/* Define the socket priority to use for connections were it is desirable
* that the NIC consider performing optimized packet processing or filtering.
* A non-zero value being sufficient to indicate general consideration of any
* possible optimization. Making it a module param allows for alternative
* values that may be unique for some NIC implementations.
*/
static int so_priority;
module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
enum nvme_tcp_send_state { enum nvme_tcp_send_state {
NVME_TCP_SEND_CMD_PDU = 0, NVME_TCP_SEND_CMD_PDU = 0,
NVME_TCP_SEND_H2C_PDU, NVME_TCP_SEND_H2C_PDU,
...@@ -1017,8 +1027,15 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue) ...@@ -1017,8 +1027,15 @@ static int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
if (req->state == NVME_TCP_SEND_DDGST) if (req->state == NVME_TCP_SEND_DDGST)
ret = nvme_tcp_try_send_ddgst(req); ret = nvme_tcp_try_send_ddgst(req);
done: done:
if (ret == -EAGAIN) if (ret == -EAGAIN) {
ret = 0; ret = 0;
} else if (ret < 0) {
dev_err(queue->ctrl->ctrl.device,
"failed to send request %d\n", ret);
if (ret != -EPIPE && ret != -ECONNRESET)
nvme_tcp_fail_request(queue->request);
nvme_tcp_done_send_req(queue);
}
return ret; return ret;
} }
...@@ -1049,25 +1066,16 @@ static void nvme_tcp_io_work(struct work_struct *w) ...@@ -1049,25 +1066,16 @@ static void nvme_tcp_io_work(struct work_struct *w)
int result; int result;
result = nvme_tcp_try_send(queue); result = nvme_tcp_try_send(queue);
if (result > 0) { if (result > 0)
pending = true; pending = true;
} else if (unlikely(result < 0)) { else if (unlikely(result < 0))
dev_err(queue->ctrl->ctrl.device, break;
"failed to send request %d\n", result);
/*
* Fail the request unless peer closed the connection,
* in which case error recovery flow will complete all.
*/
if ((result != -EPIPE) && (result != -ECONNRESET))
nvme_tcp_fail_request(queue->request);
nvme_tcp_done_send_req(queue);
return;
}
result = nvme_tcp_try_recv(queue); result = nvme_tcp_try_recv(queue);
if (result > 0) if (result > 0)
pending = true; pending = true;
else if (unlikely(result < 0))
break;
if (!pending) if (!pending)
return; return;
...@@ -1248,13 +1256,67 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) ...@@ -1248,13 +1256,67 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
return ret; return ret;
} }
static bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue)
{
return nvme_tcp_queue_id(queue) == 0;
}
static bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
int qid = nvme_tcp_queue_id(queue);
return !nvme_tcp_admin_queue(queue) &&
qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
static bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
int qid = nvme_tcp_queue_id(queue);
return !nvme_tcp_admin_queue(queue) &&
!nvme_tcp_default_queue(queue) &&
qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
ctrl->io_queues[HCTX_TYPE_READ];
}
static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
int qid = nvme_tcp_queue_id(queue);
return !nvme_tcp_admin_queue(queue) &&
!nvme_tcp_default_queue(queue) &&
!nvme_tcp_read_queue(queue) &&
qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
ctrl->io_queues[HCTX_TYPE_READ] +
ctrl->io_queues[HCTX_TYPE_POLL];
}
static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
int qid = nvme_tcp_queue_id(queue);
int n = 0;
if (nvme_tcp_default_queue(queue))
n = qid - 1;
else if (nvme_tcp_read_queue(queue))
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
else if (nvme_tcp_poll_queue(queue))
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
ctrl->io_queues[HCTX_TYPE_READ] - 1;
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
}
static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
int qid, size_t queue_size) int qid, size_t queue_size)
{ {
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvme_tcp_queue *queue = &ctrl->queues[qid]; struct nvme_tcp_queue *queue = &ctrl->queues[qid];
struct linger sol = { .l_onoff = 1, .l_linger = 0 }; struct linger sol = { .l_onoff = 1, .l_linger = 0 };
int ret, opt, rcv_pdu_size, n; int ret, opt, rcv_pdu_size;
queue->ctrl = ctrl; queue->ctrl = ctrl;
INIT_LIST_HEAD(&queue->send_list); INIT_LIST_HEAD(&queue->send_list);
...@@ -1309,6 +1371,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, ...@@ -1309,6 +1371,17 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
goto err_sock; goto err_sock;
} }
if (so_priority > 0) {
ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
(char *)&so_priority, sizeof(so_priority));
if (ret) {
dev_err(ctrl->ctrl.device,
"failed to set SO_PRIORITY sock opt, ret %d\n",
ret);
goto err_sock;
}
}
/* Set socket type of service */ /* Set socket type of service */
if (nctrl->opts->tos >= 0) { if (nctrl->opts->tos >= 0) {
opt = nctrl->opts->tos; opt = nctrl->opts->tos;
...@@ -1322,11 +1395,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, ...@@ -1322,11 +1395,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
} }
queue->sock->sk->sk_allocation = GFP_ATOMIC; queue->sock->sk->sk_allocation = GFP_ATOMIC;
if (!qid) nvme_tcp_set_queue_io_cpu(queue);
n = 0;
else
n = (qid - 1) % num_online_cpus();
queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
queue->request = NULL; queue->request = NULL;
queue->data_remaining = 0; queue->data_remaining = 0;
queue->ddgst_remaining = 0; queue->ddgst_remaining = 0;
...@@ -1861,8 +1930,13 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) ...@@ -1861,8 +1930,13 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
} }
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
/* state change failure is ok if we're in DELETING state */ /*
* state change failure is ok if we're in DELETING state,
* unless we're during creation of a new controller to
* avoid races with teardown flow.
*/
WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING); WARN_ON_ONCE(ctrl->state != NVME_CTRL_DELETING);
WARN_ON_ONCE(new);
ret = -EINVAL; ret = -EINVAL;
goto destroy_io; goto destroy_io;
} }
...@@ -2359,8 +2433,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, ...@@ -2359,8 +2433,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
ctrl->ctrl.opts->subsysnqn, &ctrl->addr); ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
nvme_get_ctrl(&ctrl->ctrl);
mutex_lock(&nvme_tcp_ctrl_mutex); mutex_lock(&nvme_tcp_ctrl_mutex);
list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
mutex_unlock(&nvme_tcp_ctrl_mutex); mutex_unlock(&nvme_tcp_ctrl_mutex);
......
...@@ -322,12 +322,25 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req) ...@@ -322,12 +322,25 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
} }
static void nvmet_id_set_model_number(struct nvme_id_ctrl *id,
struct nvmet_subsys *subsys)
{
const char *model = NVMET_DEFAULT_CTRL_MODEL;
struct nvmet_subsys_model *subsys_model;
rcu_read_lock();
subsys_model = rcu_dereference(subsys->model);
if (subsys_model)
model = subsys_model->number;
memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' ');
rcu_read_unlock();
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req) static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{ {
struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id; struct nvme_id_ctrl *id;
u16 status = 0; u16 status = 0;
const char model[] = "Linux";
id = kzalloc(sizeof(*id), GFP_KERNEL); id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) { if (!id) {
...@@ -342,7 +355,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -342,7 +355,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
memset(id->sn, ' ', sizeof(id->sn)); memset(id->sn, ' ', sizeof(id->sn));
bin2hex(id->sn, &ctrl->subsys->serial, bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); nvmet_id_set_model_number(id, ctrl->subsys);
memcpy_and_pad(id->fr, sizeof(id->fr), memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' '); UTS_RELEASE, strlen(UTS_RELEASE), ' ');
...@@ -356,8 +369,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -356,8 +369,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
/* we support multiple ports, multiples hosts and ANA: */ /* we support multiple ports, multiples hosts and ANA: */
id->cmic = (1 << 0) | (1 << 1) | (1 << 3); id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
/* no limit on data transfer sizes for now */ /* Limit MDTS according to transport capability */
id->mdts = 0; if (ctrl->ops->get_mdts)
id->mdts = ctrl->ops->get_mdts(ctrl);
else
id->mdts = 0;
id->cntlid = cpu_to_le16(ctrl->cntlid); id->cntlid = cpu_to_le16(ctrl->cntlid);
id->ver = cpu_to_le32(ctrl->subsys->ver); id->ver = cpu_to_le32(ctrl->subsys->ver);
...@@ -720,13 +737,22 @@ static void nvmet_execute_set_features(struct nvmet_req *req) ...@@ -720,13 +737,22 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
{ {
struct nvmet_subsys *subsys = req->sq->ctrl->subsys; struct nvmet_subsys *subsys = req->sq->ctrl->subsys;
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11);
u16 status = 0; u16 status = 0;
u16 nsqr;
u16 ncqr;
if (!nvmet_check_data_len(req, 0)) if (!nvmet_check_data_len(req, 0))
return; return;
switch (cdw10 & 0xff) { switch (cdw10 & 0xff) {
case NVME_FEAT_NUM_QUEUES: case NVME_FEAT_NUM_QUEUES:
ncqr = (cdw11 >> 16) & 0xffff;
nsqr = cdw11 & 0xffff;
if (ncqr == 0xffff || nsqr == 0xffff) {
status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
break;
}
nvmet_set_result(req, nvmet_set_result(req,
(subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16));
break; break;
......
...@@ -395,14 +395,12 @@ static ssize_t nvmet_ns_device_uuid_store(struct config_item *item, ...@@ -395,14 +395,12 @@ static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
struct nvmet_subsys *subsys = ns->subsys; struct nvmet_subsys *subsys = ns->subsys;
int ret = 0; int ret = 0;
mutex_lock(&subsys->lock); mutex_lock(&subsys->lock);
if (ns->enabled) { if (ns->enabled) {
ret = -EBUSY; ret = -EBUSY;
goto out_unlock; goto out_unlock;
} }
if (uuid_parse(page, &ns->uuid)) if (uuid_parse(page, &ns->uuid))
ret = -EINVAL; ret = -EINVAL;
...@@ -815,10 +813,10 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item, ...@@ -815,10 +813,10 @@ static ssize_t nvmet_subsys_attr_version_show(struct config_item *item,
(int)NVME_MAJOR(subsys->ver), (int)NVME_MAJOR(subsys->ver),
(int)NVME_MINOR(subsys->ver), (int)NVME_MINOR(subsys->ver),
(int)NVME_TERTIARY(subsys->ver)); (int)NVME_TERTIARY(subsys->ver));
else
return snprintf(page, PAGE_SIZE, "%d.%d\n", return snprintf(page, PAGE_SIZE, "%d.%d\n",
(int)NVME_MAJOR(subsys->ver), (int)NVME_MAJOR(subsys->ver),
(int)NVME_MINOR(subsys->ver)); (int)NVME_MINOR(subsys->ver));
} }
static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
...@@ -828,7 +826,6 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, ...@@ -828,7 +826,6 @@ static ssize_t nvmet_subsys_attr_version_store(struct config_item *item,
int major, minor, tertiary = 0; int major, minor, tertiary = 0;
int ret; int ret;
ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary); ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
if (ret != 2 && ret != 3) if (ret != 2 && ret != 3)
return -EINVAL; return -EINVAL;
...@@ -852,20 +849,151 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, ...@@ -852,20 +849,151 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item,
static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item, static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item,
const char *page, size_t count) const char *page, size_t count)
{ {
struct nvmet_subsys *subsys = to_subsys(item); u64 serial;
if (sscanf(page, "%llx\n", &serial) != 1)
return -EINVAL;
down_write(&nvmet_config_sem); down_write(&nvmet_config_sem);
sscanf(page, "%llx\n", &subsys->serial); to_subsys(item)->serial = serial;
up_write(&nvmet_config_sem); up_write(&nvmet_config_sem);
return count; return count;
} }
CONFIGFS_ATTR(nvmet_subsys_, attr_serial); CONFIGFS_ATTR(nvmet_subsys_, attr_serial);
static ssize_t nvmet_subsys_attr_cntlid_min_show(struct config_item *item,
char *page)
{
return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_min);
}
static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item,
const char *page, size_t cnt)
{
u16 cntlid_min;
if (sscanf(page, "%hu\n", &cntlid_min) != 1)
return -EINVAL;
if (cntlid_min == 0)
return -EINVAL;
down_write(&nvmet_config_sem);
if (cntlid_min >= to_subsys(item)->cntlid_max)
goto out_unlock;
to_subsys(item)->cntlid_min = cntlid_min;
up_write(&nvmet_config_sem);
return cnt;
out_unlock:
up_write(&nvmet_config_sem);
return -EINVAL;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_min);
static ssize_t nvmet_subsys_attr_cntlid_max_show(struct config_item *item,
char *page)
{
return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_max);
}
static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item,
const char *page, size_t cnt)
{
u16 cntlid_max;
if (sscanf(page, "%hu\n", &cntlid_max) != 1)
return -EINVAL;
if (cntlid_max == 0)
return -EINVAL;
down_write(&nvmet_config_sem);
if (cntlid_max <= to_subsys(item)->cntlid_min)
goto out_unlock;
to_subsys(item)->cntlid_max = cntlid_max;
up_write(&nvmet_config_sem);
return cnt;
out_unlock:
up_write(&nvmet_config_sem);
return -EINVAL;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_max);
static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
char *page)
{
struct nvmet_subsys *subsys = to_subsys(item);
struct nvmet_subsys_model *subsys_model;
char *model = NVMET_DEFAULT_CTRL_MODEL;
int ret;
rcu_read_lock();
subsys_model = rcu_dereference(subsys->model);
if (subsys_model)
model = subsys_model->number;
ret = snprintf(page, PAGE_SIZE, "%s\n", model);
rcu_read_unlock();
return ret;
}
/* See Section 1.5 of NVMe 1.4 */
static bool nvmet_is_ascii(const char c)
{
return c >= 0x20 && c <= 0x7e;
}
static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_subsys *subsys = to_subsys(item);
struct nvmet_subsys_model *new_model;
char *new_model_number;
int pos = 0, len;
len = strcspn(page, "\n");
if (!len)
return -EINVAL;
for (pos = 0; pos < len; pos++) {
if (!nvmet_is_ascii(page[pos]))
return -EINVAL;
}
new_model_number = kstrndup(page, len, GFP_KERNEL);
if (!new_model_number)
return -ENOMEM;
new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL);
if (!new_model) {
kfree(new_model_number);
return -ENOMEM;
}
memcpy(new_model->number, new_model_number, len);
down_write(&nvmet_config_sem);
mutex_lock(&subsys->lock);
new_model = rcu_replace_pointer(subsys->model, new_model,
mutex_is_locked(&subsys->lock));
mutex_unlock(&subsys->lock);
up_write(&nvmet_config_sem);
kfree_rcu(new_model, rcuhead);
return count;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
static struct configfs_attribute *nvmet_subsys_attrs[] = { static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_allow_any_host,
&nvmet_subsys_attr_attr_version, &nvmet_subsys_attr_attr_version,
&nvmet_subsys_attr_attr_serial, &nvmet_subsys_attr_attr_serial,
&nvmet_subsys_attr_attr_cntlid_min,
&nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model,
NULL, NULL,
}; };
......
...@@ -1289,8 +1289,11 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, ...@@ -1289,8 +1289,11 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
if (!ctrl->sqs) if (!ctrl->sqs)
goto out_free_cqs; goto out_free_cqs;
if (subsys->cntlid_min > subsys->cntlid_max)
goto out_free_cqs;
ret = ida_simple_get(&cntlid_ida, ret = ida_simple_get(&cntlid_ida,
NVME_CNTLID_MIN, NVME_CNTLID_MAX, subsys->cntlid_min, subsys->cntlid_max,
GFP_KERNEL); GFP_KERNEL);
if (ret < 0) { if (ret < 0) {
status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
...@@ -1438,7 +1441,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, ...@@ -1438,7 +1441,8 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
kfree(subsys); kfree(subsys);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
subsys->cntlid_min = NVME_CNTLID_MIN;
subsys->cntlid_max = NVME_CNTLID_MAX;
kref_init(&subsys->ref); kref_init(&subsys->ref);
mutex_init(&subsys->lock); mutex_init(&subsys->lock);
...@@ -1457,6 +1461,7 @@ static void nvmet_subsys_free(struct kref *ref) ...@@ -1457,6 +1461,7 @@ static void nvmet_subsys_free(struct kref *ref)
WARN_ON_ONCE(!list_empty(&subsys->namespaces)); WARN_ON_ONCE(!list_empty(&subsys->namespaces));
kfree(subsys->subsysnqn); kfree(subsys->subsysnqn);
kfree_rcu(subsys->model, rcuhead);
kfree(subsys); kfree(subsys);
} }
......
...@@ -485,7 +485,6 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) ...@@ -485,7 +485,6 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work)
out_disable: out_disable:
dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
nvme_uninit_ctrl(&ctrl->ctrl); nvme_uninit_ctrl(&ctrl->ctrl);
nvme_put_ctrl(&ctrl->ctrl);
} }
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = { static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
...@@ -618,8 +617,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ...@@ -618,8 +617,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn); "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
nvme_get_ctrl(&ctrl->ctrl);
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
WARN_ON_ONCE(!changed); WARN_ON_ONCE(!changed);
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#define NVMET_ASYNC_EVENTS 4 #define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128 #define NVMET_ERROR_LOG_SLOTS 128
#define NVMET_NO_ERROR_LOC ((u16)-1) #define NVMET_NO_ERROR_LOC ((u16)-1)
#define NVMET_DEFAULT_CTRL_MODEL "Linux"
/* /*
* Supported optional AENs: * Supported optional AENs:
...@@ -202,6 +203,11 @@ struct nvmet_ctrl { ...@@ -202,6 +203,11 @@ struct nvmet_ctrl {
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
}; };
struct nvmet_subsys_model {
struct rcu_head rcuhead;
char number[];
};
struct nvmet_subsys { struct nvmet_subsys {
enum nvme_subsys_type type; enum nvme_subsys_type type;
...@@ -211,6 +217,8 @@ struct nvmet_subsys { ...@@ -211,6 +217,8 @@ struct nvmet_subsys {
struct list_head namespaces; struct list_head namespaces;
unsigned int nr_namespaces; unsigned int nr_namespaces;
unsigned int max_nsid; unsigned int max_nsid;
u16 cntlid_min;
u16 cntlid_max;
struct list_head ctrls; struct list_head ctrls;
...@@ -227,6 +235,8 @@ struct nvmet_subsys { ...@@ -227,6 +235,8 @@ struct nvmet_subsys {
struct config_group namespaces_group; struct config_group namespaces_group;
struct config_group allowed_hosts_group; struct config_group allowed_hosts_group;
struct nvmet_subsys_model __rcu *model;
}; };
static inline struct nvmet_subsys *to_subsys(struct config_item *item) static inline struct nvmet_subsys *to_subsys(struct config_item *item)
...@@ -279,6 +289,7 @@ struct nvmet_fabrics_ops { ...@@ -279,6 +289,7 @@ struct nvmet_fabrics_ops {
struct nvmet_port *port, char *traddr); struct nvmet_port *port, char *traddr);
u16 (*install_queue)(struct nvmet_sq *nvme_sq); u16 (*install_queue)(struct nvmet_sq *nvme_sq);
void (*discovery_chg)(struct nvmet_port *port); void (*discovery_chg)(struct nvmet_port *port);
u8 (*get_mdts)(const struct nvmet_ctrl *ctrl);
}; };
#define NVMET_MAX_INLINE_BIOVEC 8 #define NVMET_MAX_INLINE_BIOVEC 8
......
...@@ -31,6 +31,9 @@ ...@@ -31,6 +31,9 @@
#define NVMET_RDMA_MAX_INLINE_SGE 4 #define NVMET_RDMA_MAX_INLINE_SGE 4
#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE) #define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
/* Assume mpsmin == device_page_size == 4KB */
#define NVMET_RDMA_MAX_MDTS 8
struct nvmet_rdma_cmd { struct nvmet_rdma_cmd {
struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
struct ib_cqe cqe; struct ib_cqe cqe;
...@@ -975,7 +978,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) ...@@ -975,7 +978,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
{ {
struct ib_qp_init_attr qp_attr; struct ib_qp_init_attr qp_attr;
struct nvmet_rdma_device *ndev = queue->dev; struct nvmet_rdma_device *ndev = queue->dev;
int comp_vector, nr_cqe, ret, i; int comp_vector, nr_cqe, ret, i, factor;
/* /*
* Spread the io queues across completion vectors, * Spread the io queues across completion vectors,
...@@ -1008,7 +1011,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) ...@@ -1008,7 +1011,9 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
qp_attr.qp_type = IB_QPT_RC; qp_attr.qp_type = IB_QPT_RC;
/* +1 for drain */ /* +1 for drain */
qp_attr.cap.max_send_wr = queue->send_queue_size + 1; qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
qp_attr.cap.max_rdma_ctxs = queue->send_queue_size; factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num,
1 << NVMET_RDMA_MAX_MDTS);
qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor;
qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
ndev->device->attrs.max_send_sge); ndev->device->attrs.max_send_sge);
...@@ -1602,6 +1607,11 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, ...@@ -1602,6 +1607,11 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
} }
} }
static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
{
return NVMET_RDMA_MAX_MDTS;
}
static const struct nvmet_fabrics_ops nvmet_rdma_ops = { static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.type = NVMF_TRTYPE_RDMA, .type = NVMF_TRTYPE_RDMA,
...@@ -1612,6 +1622,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = { ...@@ -1612,6 +1622,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.queue_response = nvmet_rdma_queue_response, .queue_response = nvmet_rdma_queue_response,
.delete_ctrl = nvmet_rdma_delete_ctrl, .delete_ctrl = nvmet_rdma_delete_ctrl,
.disc_traddr = nvmet_rdma_disc_port_addr, .disc_traddr = nvmet_rdma_disc_port_addr,
.get_mdts = nvmet_rdma_get_mdts,
}; };
static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data) static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data)
......
...@@ -19,6 +19,16 @@ ...@@ -19,6 +19,16 @@
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
/* Define the socket priority to use for connections were it is desirable
* that the NIC consider performing optimized packet processing or filtering.
* A non-zero value being sufficient to indicate general consideration of any
* possible optimization. Making it a module param allows for alternative
* values that may be unique for some NIC implementations.
*/
static int so_priority;
module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
#define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8
#define NVMET_TCP_IO_WORK_BUDGET 64 #define NVMET_TCP_IO_WORK_BUDGET 64
...@@ -616,7 +626,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) ...@@ -616,7 +626,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
return 1; return 1;
} }
static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
{ {
struct nvmet_tcp_queue *queue = cmd->queue; struct nvmet_tcp_queue *queue = cmd->queue;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
...@@ -626,6 +636,9 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) ...@@ -626,6 +636,9 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
}; };
int ret; int ret;
if (!last_in_batch && cmd->queue->send_list_len)
msg.msg_flags |= MSG_MORE;
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0)) if (unlikely(ret <= 0))
return ret; return ret;
...@@ -666,7 +679,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, ...@@ -666,7 +679,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue,
} }
if (cmd->state == NVMET_TCP_SEND_DDGST) { if (cmd->state == NVMET_TCP_SEND_DDGST) {
ret = nvmet_try_send_ddgst(cmd); ret = nvmet_try_send_ddgst(cmd, last_in_batch);
if (ret <= 0) if (ret <= 0)
goto done_send; goto done_send;
} }
...@@ -788,7 +801,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) ...@@ -788,7 +801,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
icresp->hdr.pdo = 0; icresp->hdr.pdo = 0;
icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */ icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */
icresp->cpda = 0; icresp->cpda = 0;
if (queue->hdr_digest) if (queue->hdr_digest)
icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
...@@ -1433,6 +1446,13 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) ...@@ -1433,6 +1446,13 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
if (ret) if (ret)
return ret; return ret;
if (so_priority > 0) {
ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
(char *)&so_priority, sizeof(so_priority));
if (ret)
return ret;
}
/* Set socket type of service */ /* Set socket type of service */
if (inet->rcv_tos > 0) { if (inet->rcv_tos > 0) {
int tos = inet->rcv_tos; int tos = inet->rcv_tos;
...@@ -1622,6 +1642,15 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport) ...@@ -1622,6 +1642,15 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
goto err_sock; goto err_sock;
} }
if (so_priority > 0) {
ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
(char *)&so_priority, sizeof(so_priority));
if (ret) {
pr_err("failed to set SO_PRIORITY sock opt %d\n", ret);
goto err_sock;
}
}
ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
sizeof(port->addr)); sizeof(port->addr));
if (ret) { if (ret) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment