Commit ed565371 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.15' of git://git.infradead.org/nvme into for-linus

Pull NVMe fixes from Christoph:

"A few more nvme updates for 4.15.  A single small PCIe fix, and a number
of patches for RDMA that are a little larger than what I'd like to see
for -rc2, but they fix important issues seen in the wild."
parents 2967acbb 7e5dd57e
...@@ -1759,6 +1759,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev) ...@@ -1759,6 +1759,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
dev->host_mem_descs, dev->host_mem_descs_dma); dev->host_mem_descs, dev->host_mem_descs_dma);
dev->host_mem_descs = NULL; dev->host_mem_descs = NULL;
dev->nr_host_mem_descs = 0;
} }
static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <rdma/mr_pool.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/atomic.h> #include <linux/atomic.h>
...@@ -59,6 +60,9 @@ struct nvme_rdma_request { ...@@ -59,6 +60,9 @@ struct nvme_rdma_request {
struct nvme_request req; struct nvme_request req;
struct ib_mr *mr; struct ib_mr *mr;
struct nvme_rdma_qe sqe; struct nvme_rdma_qe sqe;
union nvme_result result;
__le16 status;
refcount_t ref;
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
u32 num_sge; u32 num_sge;
int nents; int nents;
...@@ -73,11 +77,11 @@ struct nvme_rdma_request { ...@@ -73,11 +77,11 @@ struct nvme_rdma_request {
enum nvme_rdma_queue_flags { enum nvme_rdma_queue_flags {
NVME_RDMA_Q_ALLOCATED = 0, NVME_RDMA_Q_ALLOCATED = 0,
NVME_RDMA_Q_LIVE = 1, NVME_RDMA_Q_LIVE = 1,
NVME_RDMA_Q_TR_READY = 2,
}; };
struct nvme_rdma_queue { struct nvme_rdma_queue {
struct nvme_rdma_qe *rsp_ring; struct nvme_rdma_qe *rsp_ring;
atomic_t sig_count;
int queue_size; int queue_size;
size_t cmnd_capsule_len; size_t cmnd_capsule_len;
struct nvme_rdma_ctrl *ctrl; struct nvme_rdma_ctrl *ctrl;
...@@ -258,32 +262,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) ...@@ -258,32 +262,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
return ret; return ret;
} }
static int nvme_rdma_reinit_request(void *data, struct request *rq)
{
struct nvme_rdma_ctrl *ctrl = data;
struct nvme_rdma_device *dev = ctrl->device;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
int ret = 0;
if (WARN_ON_ONCE(!req->mr))
return 0;
ib_dereg_mr(req->mr);
req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
ctrl->max_fr_pages);
if (IS_ERR(req->mr)) {
ret = PTR_ERR(req->mr);
req->mr = NULL;
goto out;
}
req->mr->need_inval = false;
out:
return ret;
}
static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
struct request *rq, unsigned int hctx_idx) struct request *rq, unsigned int hctx_idx)
{ {
...@@ -293,9 +271,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, ...@@ -293,9 +271,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device; struct nvme_rdma_device *dev = queue->device;
if (req->mr)
ib_dereg_mr(req->mr);
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE); DMA_TO_DEVICE);
} }
...@@ -317,21 +292,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, ...@@ -317,21 +292,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
if (ret) if (ret)
return ret; return ret;
req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
ctrl->max_fr_pages);
if (IS_ERR(req->mr)) {
ret = PTR_ERR(req->mr);
goto out_free_qe;
}
req->queue = queue; req->queue = queue;
return 0; return 0;
out_free_qe:
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
DMA_TO_DEVICE);
return -ENOMEM;
} }
static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
...@@ -428,10 +391,23 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) ...@@ -428,10 +391,23 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
{ {
struct nvme_rdma_device *dev = queue->device; struct nvme_rdma_device *dev;
struct ib_device *ibdev = dev->dev; struct ib_device *ibdev;
rdma_destroy_qp(queue->cm_id); if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
return;
dev = queue->device;
ibdev = dev->dev;
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
/*
* The cm_id object might have been destroyed during RDMA connection
* establishment error flow to avoid getting other cma events, thus
* the destruction of the QP shouldn't use rdma_cm API.
*/
ib_destroy_qp(queue->qp);
ib_free_cq(queue->ib_cq); ib_free_cq(queue->ib_cq);
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
...@@ -440,6 +416,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) ...@@ -440,6 +416,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
nvme_rdma_dev_put(dev); nvme_rdma_dev_put(dev);
} }
static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
{
return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
ibdev->attrs.max_fast_reg_page_list_len);
}
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
{ {
struct ib_device *ibdev; struct ib_device *ibdev;
...@@ -482,8 +464,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ...@@ -482,8 +464,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
goto out_destroy_qp; goto out_destroy_qp;
} }
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
queue->queue_size,
IB_MR_TYPE_MEM_REG,
nvme_rdma_get_max_fr_pages(ibdev));
if (ret) {
dev_err(queue->ctrl->ctrl.device,
"failed to initialize MR pool sized %d for QID %d\n",
queue->queue_size, idx);
goto out_destroy_ring;
}
set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
return 0; return 0;
out_destroy_ring:
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
sizeof(struct nvme_completion), DMA_FROM_DEVICE);
out_destroy_qp: out_destroy_qp:
rdma_destroy_qp(queue->cm_id); rdma_destroy_qp(queue->cm_id);
out_destroy_ib_cq: out_destroy_ib_cq:
...@@ -510,7 +508,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -510,7 +508,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
queue->cmnd_capsule_len = sizeof(struct nvme_command); queue->cmnd_capsule_len = sizeof(struct nvme_command);
queue->queue_size = queue_size; queue->queue_size = queue_size;
atomic_set(&queue->sig_count, 0);
queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
RDMA_PS_TCP, IB_QPT_RC); RDMA_PS_TCP, IB_QPT_RC);
...@@ -546,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -546,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
out_destroy_cm_id: out_destroy_cm_id:
rdma_destroy_id(queue->cm_id); rdma_destroy_id(queue->cm_id);
nvme_rdma_destroy_queue_ib(queue);
return ret; return ret;
} }
...@@ -756,8 +754,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -756,8 +754,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->device = ctrl->queues[0].device; ctrl->device = ctrl->queues[0].device;
ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
ctrl->device->dev->attrs.max_fast_reg_page_list_len);
if (new) { if (new) {
ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
...@@ -771,10 +768,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -771,10 +768,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
error = PTR_ERR(ctrl->ctrl.admin_q); error = PTR_ERR(ctrl->ctrl.admin_q);
goto out_free_tagset; goto out_free_tagset;
} }
} else {
error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
if (error)
goto out_free_queue;
} }
error = nvme_rdma_start_queue(ctrl, 0); error = nvme_rdma_start_queue(ctrl, 0);
...@@ -854,10 +847,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ...@@ -854,10 +847,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
goto out_free_tag_set; goto out_free_tag_set;
} }
} else { } else {
ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
if (ret)
goto out_free_io_queues;
blk_mq_update_nr_hw_queues(&ctrl->tag_set, blk_mq_update_nr_hw_queues(&ctrl->tag_set,
ctrl->ctrl.queue_count - 1); ctrl->ctrl.queue_count - 1);
} }
...@@ -1018,8 +1007,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -1018,8 +1007,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
{ {
if (unlikely(wc->status != IB_WC_SUCCESS)) struct nvme_rdma_request *req =
container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
struct request *rq = blk_mq_rq_from_pdu(req);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
return;
}
if (refcount_dec_and_test(&req->ref))
nvme_end_request(rq, req->status, req->result);
} }
static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
...@@ -1030,7 +1029,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, ...@@ -1030,7 +1029,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
.opcode = IB_WR_LOCAL_INV, .opcode = IB_WR_LOCAL_INV,
.next = NULL, .next = NULL,
.num_sge = 0, .num_sge = 0,
.send_flags = 0, .send_flags = IB_SEND_SIGNALED,
.ex.invalidate_rkey = req->mr->rkey, .ex.invalidate_rkey = req->mr->rkey,
}; };
...@@ -1044,22 +1043,15 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, ...@@ -1044,22 +1043,15 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
struct request *rq) struct request *rq)
{ {
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_ctrl *ctrl = queue->ctrl;
struct nvme_rdma_device *dev = queue->device; struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev; struct ib_device *ibdev = dev->dev;
int res;
if (!blk_rq_bytes(rq)) if (!blk_rq_bytes(rq))
return; return;
if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { if (req->mr) {
res = nvme_rdma_inv_rkey(queue, req); ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
if (unlikely(res < 0)) { req->mr = NULL;
dev_err(ctrl->ctrl.device,
"Queueing INV WR for rkey %#x failed (%d)\n",
req->mr->rkey, res);
nvme_rdma_error_recovery(queue->ctrl);
}
} }
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
...@@ -1118,12 +1110,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, ...@@ -1118,12 +1110,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
int nr; int nr;
req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
if (WARN_ON_ONCE(!req->mr))
return -EAGAIN;
/* /*
* Align the MR to a 4K page size to match the ctrl page size and * Align the MR to a 4K page size to match the ctrl page size and
* the block virtual boundary. * the block virtual boundary.
*/ */
nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
if (unlikely(nr < count)) { if (unlikely(nr < count)) {
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
req->mr = NULL;
if (nr < 0) if (nr < 0)
return nr; return nr;
return -EINVAL; return -EINVAL;
...@@ -1142,8 +1140,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, ...@@ -1142,8 +1140,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE; IB_ACCESS_REMOTE_WRITE;
req->mr->need_inval = true;
sg->addr = cpu_to_le64(req->mr->iova); sg->addr = cpu_to_le64(req->mr->iova);
put_unaligned_le24(req->mr->length, sg->length); put_unaligned_le24(req->mr->length, sg->length);
put_unaligned_le32(req->mr->rkey, sg->key); put_unaligned_le32(req->mr->rkey, sg->key);
...@@ -1163,7 +1159,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ...@@ -1163,7 +1159,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
req->num_sge = 1; req->num_sge = 1;
req->inline_data = false; req->inline_data = false;
req->mr->need_inval = false; refcount_set(&req->ref, 2); /* send and recv completions */
c->common.flags |= NVME_CMD_SGL_METABUF; c->common.flags |= NVME_CMD_SGL_METABUF;
...@@ -1200,25 +1196,24 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ...@@ -1200,25 +1196,24 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
{ {
if (unlikely(wc->status != IB_WC_SUCCESS)) struct nvme_rdma_qe *qe =
nvme_rdma_wr_error(cq, wc, "SEND"); container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
} struct nvme_rdma_request *req =
container_of(qe, struct nvme_rdma_request, sqe);
struct request *rq = blk_mq_rq_from_pdu(req);
/* if (unlikely(wc->status != IB_WC_SUCCESS)) {
* We want to signal completion at least every queue depth/2. This returns the nvme_rdma_wr_error(cq, wc, "SEND");
* largest power of two that is not above half of (queue size + 1) to optimize return;
* (avoid divisions). }
*/
static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
{
int limit = 1 << ilog2((queue->queue_size + 1) / 2);
return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; if (refcount_dec_and_test(&req->ref))
nvme_end_request(rq, req->status, req->result);
} }
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
struct ib_send_wr *first, bool flush) struct ib_send_wr *first)
{ {
struct ib_send_wr wr, *bad_wr; struct ib_send_wr wr, *bad_wr;
int ret; int ret;
...@@ -1227,31 +1222,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, ...@@ -1227,31 +1222,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
sge->length = sizeof(struct nvme_command), sge->length = sizeof(struct nvme_command),
sge->lkey = queue->device->pd->local_dma_lkey; sge->lkey = queue->device->pd->local_dma_lkey;
qe->cqe.done = nvme_rdma_send_done;
wr.next = NULL; wr.next = NULL;
wr.wr_cqe = &qe->cqe; wr.wr_cqe = &qe->cqe;
wr.sg_list = sge; wr.sg_list = sge;
wr.num_sge = num_sge; wr.num_sge = num_sge;
wr.opcode = IB_WR_SEND; wr.opcode = IB_WR_SEND;
wr.send_flags = 0; wr.send_flags = IB_SEND_SIGNALED;
/*
* Unsignalled send completions are another giant desaster in the
* IB Verbs spec: If we don't regularly post signalled sends
* the send queue will fill up and only a QP reset will rescue us.
* Would have been way to obvious to handle this in hardware or
* at least the RDMA stack..
*
* Always signal the flushes. The magic request used for the flush
* sequencer is not allocated in our driver's tagset and it's
* triggered to be freed by blk_cleanup_queue(). So we need to
* always mark it as signaled to ensure that the "wr_cqe", which is
* embedded in request's payload, is not freed when __ib_process_cq()
* calls wr_cqe->done().
*/
if (nvme_rdma_queue_sig_limit(queue) || flush)
wr.send_flags |= IB_SEND_SIGNALED;
if (first) if (first)
first->next = &wr; first->next = &wr;
...@@ -1301,6 +1277,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue) ...@@ -1301,6 +1277,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
return queue->ctrl->tag_set.tags[queue_idx - 1]; return queue->ctrl->tag_set.tags[queue_idx - 1];
} }
static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc)
{
if (unlikely(wc->status != IB_WC_SUCCESS))
nvme_rdma_wr_error(cq, wc, "ASYNC");
}
static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
{ {
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
...@@ -1319,10 +1301,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) ...@@ -1319,10 +1301,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
cmd->common.flags |= NVME_CMD_SGL_METABUF; cmd->common.flags |= NVME_CMD_SGL_METABUF;
nvme_rdma_set_sg_null(cmd); nvme_rdma_set_sg_null(cmd);
sqe->cqe.done = nvme_rdma_async_done;
ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
DMA_TO_DEVICE); DMA_TO_DEVICE);
ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false); ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
WARN_ON_ONCE(ret); WARN_ON_ONCE(ret);
} }
...@@ -1343,14 +1327,34 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, ...@@ -1343,14 +1327,34 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
} }
req = blk_mq_rq_to_pdu(rq); req = blk_mq_rq_to_pdu(rq);
if (rq->tag == tag) req->status = cqe->status;
ret = 1; req->result = cqe->result;
if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
dev_err(queue->ctrl->ctrl.device,
"Bogus remote invalidation for rkey %#x\n",
req->mr->rkey);
nvme_rdma_error_recovery(queue->ctrl);
}
} else if (req->mr) {
ret = nvme_rdma_inv_rkey(queue, req);
if (unlikely(ret < 0)) {
dev_err(queue->ctrl->ctrl.device,
"Queueing INV WR for rkey %#x failed (%d)\n",
req->mr->rkey, ret);
nvme_rdma_error_recovery(queue->ctrl);
}
/* the local invalidation completion will end the request */
return 0;
}
if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && if (refcount_dec_and_test(&req->ref)) {
wc->ex.invalidate_rkey == req->mr->rkey) if (rq->tag == tag)
req->mr->need_inval = false; ret = 1;
nvme_end_request(rq, req->status, req->result);
}
nvme_end_request(rq, cqe->status, cqe->result);
return ret; return ret;
} }
...@@ -1607,7 +1611,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1607,7 +1611,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_qe *sqe = &req->sqe; struct nvme_rdma_qe *sqe = &req->sqe;
struct nvme_command *c = sqe->data; struct nvme_command *c = sqe->data;
bool flush = false;
struct ib_device *dev; struct ib_device *dev;
blk_status_t ret; blk_status_t ret;
int err; int err;
...@@ -1636,13 +1639,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1636,13 +1639,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
goto err; goto err;
} }
sqe->cqe.done = nvme_rdma_send_done;
ib_dma_sync_single_for_device(dev, sqe->dma, ib_dma_sync_single_for_device(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE); sizeof(struct nvme_command), DMA_TO_DEVICE);
if (req_op(rq) == REQ_OP_FLUSH)
flush = true;
err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); req->mr ? &req->reg_wr.wr : NULL);
if (unlikely(err)) { if (unlikely(err)) {
nvme_rdma_unmap_data(queue, rq); nvme_rdma_unmap_data(queue, rq);
goto err; goto err;
...@@ -1790,7 +1793,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { ...@@ -1790,7 +1793,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.submit_async_event = nvme_rdma_submit_async_event, .submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl, .delete_ctrl = nvme_rdma_delete_ctrl,
.get_address = nvmf_get_address, .get_address = nvmf_get_address,
.reinit_request = nvme_rdma_reinit_request,
}; };
static inline bool static inline bool
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment