Commit ff8519f9 authored by Sagi Grimberg's avatar Sagi Grimberg Committed by Christoph Hellwig

nvme-rdma: implement polling queue map

When passed with nr_poll_queues setup additional queues with cq polling
context IB_POLL_DIRECT (no interrupts) and make sure to set
QUEUE_FLAG_POLL on the connect_q. In addition add the third queue
mapping for polling queues.

nvmf connect on this queue is polled for like all other requests so make
nvmf_connect_io_queue poll for polling queues.
Signed-off-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 89d43802
...@@ -162,6 +162,13 @@ static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue) ...@@ -162,6 +162,13 @@ static inline int nvme_rdma_queue_idx(struct nvme_rdma_queue *queue)
return queue - queue->ctrl->queues; return queue - queue->ctrl->queues;
} }
static bool nvme_rdma_poll_queue(struct nvme_rdma_queue *queue)
{
return nvme_rdma_queue_idx(queue) >
queue->ctrl->ctrl.opts->nr_io_queues +
queue->ctrl->ctrl.opts->nr_write_queues;
}
static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue) static inline size_t nvme_rdma_inline_data_size(struct nvme_rdma_queue *queue)
{ {
return queue->cmnd_capsule_len - sizeof(struct nvme_command); return queue->cmnd_capsule_len - sizeof(struct nvme_command);
...@@ -440,6 +447,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ...@@ -440,6 +447,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
const int send_wr_factor = 3; /* MR, SEND, INV */ const int send_wr_factor = 3; /* MR, SEND, INV */
const int cq_factor = send_wr_factor + 1; /* + RECV */ const int cq_factor = send_wr_factor + 1; /* + RECV */
int comp_vector, idx = nvme_rdma_queue_idx(queue); int comp_vector, idx = nvme_rdma_queue_idx(queue);
enum ib_poll_context poll_ctx;
int ret; int ret;
queue->device = nvme_rdma_find_get_device(queue->cm_id); queue->device = nvme_rdma_find_get_device(queue->cm_id);
...@@ -456,10 +464,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ...@@ -456,10 +464,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
*/ */
comp_vector = idx == 0 ? idx : idx - 1; comp_vector = idx == 0 ? idx : idx - 1;
/* Polling queues need direct cq polling context */
if (nvme_rdma_poll_queue(queue))
poll_ctx = IB_POLL_DIRECT;
else
poll_ctx = IB_POLL_SOFTIRQ;
/* +1 for ib_stop_cq */ /* +1 for ib_stop_cq */
queue->ib_cq = ib_alloc_cq(ibdev, queue, queue->ib_cq = ib_alloc_cq(ibdev, queue,
cq_factor * queue->queue_size + 1, cq_factor * queue->queue_size + 1,
comp_vector, IB_POLL_SOFTIRQ); comp_vector, poll_ctx);
if (IS_ERR(queue->ib_cq)) { if (IS_ERR(queue->ib_cq)) {
ret = PTR_ERR(queue->ib_cq); ret = PTR_ERR(queue->ib_cq);
goto out_put_dev; goto out_put_dev;
...@@ -595,15 +609,17 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) ...@@ -595,15 +609,17 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl)
static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
{ {
struct nvme_rdma_queue *queue = &ctrl->queues[idx];
bool poll = nvme_rdma_poll_queue(queue);
int ret; int ret;
if (idx) if (idx)
ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, false); ret = nvmf_connect_io_queue(&ctrl->ctrl, idx, poll);
else else
ret = nvmf_connect_admin_queue(&ctrl->ctrl); ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (!ret) if (!ret)
set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags); set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
else else
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"failed to connect queue: %d ret=%d\n", idx, ret); "failed to connect queue: %d ret=%d\n", idx, ret);
...@@ -646,6 +662,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) ...@@ -646,6 +662,7 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
ibdev->num_comp_vectors); ibdev->num_comp_vectors);
nr_io_queues += min(opts->nr_write_queues, num_online_cpus()); nr_io_queues += min(opts->nr_write_queues, num_online_cpus());
nr_io_queues += min(opts->nr_poll_queues, num_online_cpus());
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret) if (ret)
...@@ -716,7 +733,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, ...@@ -716,7 +733,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->driver_data = ctrl; set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1; set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT; set->timeout = NVME_IO_TIMEOUT;
set->nr_maps = 2 /* default + read */; set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2;
} }
ret = blk_mq_alloc_tag_set(set); ret = blk_mq_alloc_tag_set(set);
...@@ -1742,6 +1759,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1742,6 +1759,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
return BLK_STS_IOERR; return BLK_STS_IOERR;
} }
static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
{
struct nvme_rdma_queue *queue = hctx->driver_data;
return ib_process_cq_direct(queue->ib_cq, -1);
}
static void nvme_rdma_complete_rq(struct request *rq) static void nvme_rdma_complete_rq(struct request *rq)
{ {
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
...@@ -1772,6 +1796,17 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) ...@@ -1772,6 +1796,17 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
ctrl->device->dev, 0); ctrl->device->dev, 0);
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ], blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
ctrl->device->dev, 0); ctrl->device->dev, 0);
if (ctrl->ctrl.opts->nr_poll_queues) {
set->map[HCTX_TYPE_POLL].nr_queues =
ctrl->ctrl.opts->nr_poll_queues;
set->map[HCTX_TYPE_POLL].queue_offset =
ctrl->ctrl.opts->nr_io_queues;
if (ctrl->ctrl.opts->nr_write_queues)
set->map[HCTX_TYPE_POLL].queue_offset +=
ctrl->ctrl.opts->nr_write_queues;
blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
}
return 0; return 0;
} }
...@@ -1783,6 +1818,7 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = { ...@@ -1783,6 +1818,7 @@ static const struct blk_mq_ops nvme_rdma_mq_ops = {
.init_hctx = nvme_rdma_init_hctx, .init_hctx = nvme_rdma_init_hctx,
.timeout = nvme_rdma_timeout, .timeout = nvme_rdma_timeout,
.map_queues = nvme_rdma_map_queues, .map_queues = nvme_rdma_map_queues,
.poll = nvme_rdma_poll,
}; };
static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
...@@ -1927,7 +1963,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ...@@ -1927,7 +1963,8 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 1; ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
opts->nr_poll_queues + 1;
ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato; ctrl->ctrl.kato = opts->kato;
...@@ -1979,7 +2016,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = { ...@@ -1979,7 +2016,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
.required_opts = NVMF_OPT_TRADDR, .required_opts = NVMF_OPT_TRADDR,
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
NVMF_OPT_NR_WRITE_QUEUES, NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES,
.create_ctrl = nvme_rdma_create_ctrl, .create_ctrl = nvme_rdma_create_ctrl,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment