Commit 1052b8ac authored by Jens Axboe's avatar Jens Axboe

blk-mq: when polling for IO, look for any completion

If we want to support async IO polling, then we have to allow finding
completions that aren't just for the one we are looking for. Always pass
in -1 to the mq_ops->poll() helper, and have that return how many events
were found in this poll loop.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 1db4909e
...@@ -1273,10 +1273,19 @@ blk_qc_t submit_bio(struct bio *bio) ...@@ -1273,10 +1273,19 @@ blk_qc_t submit_bio(struct bio *bio)
} }
EXPORT_SYMBOL(submit_bio); EXPORT_SYMBOL(submit_bio);
bool blk_poll(struct request_queue *q, blk_qc_t cookie) /**
* blk_poll - poll for IO completions
* @q: the queue
* @cookie: cookie passed back at IO submission time
*
* Description:
* Poll for completions on the passed in queue. Returns number of
* completed entries found.
*/
int blk_poll(struct request_queue *q, blk_qc_t cookie)
{ {
if (!q->poll_fn || !blk_qc_t_valid(cookie)) if (!q->poll_fn || !blk_qc_t_valid(cookie))
return false; return 0;
if (current->plug) if (current->plug)
blk_flush_plug_list(current->plug, false); blk_flush_plug_list(current->plug, false);
......
...@@ -3285,15 +3285,12 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, ...@@ -3285,15 +3285,12 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
return false; return false;
/* /*
* poll_nsec can be: * If we get here, hybrid polling is enabled. Hence poll_nsec can be:
* *
* -1: don't ever hybrid sleep
* 0: use half of prev avg * 0: use half of prev avg
* >0: use this specific value * >0: use this specific value
*/ */
if (q->poll_nsec == -1) if (q->poll_nsec > 0)
return false;
else if (q->poll_nsec > 0)
nsecs = q->poll_nsec; nsecs = q->poll_nsec;
else else
nsecs = blk_mq_poll_nsecs(q, hctx, rq); nsecs = blk_mq_poll_nsecs(q, hctx, rq);
...@@ -3330,11 +3327,41 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q, ...@@ -3330,11 +3327,41 @@ static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
return true; return true;
} }
static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) static bool blk_mq_poll_hybrid(struct request_queue *q,
struct blk_mq_hw_ctx *hctx, blk_qc_t cookie)
{ {
struct request_queue *q = hctx->queue; struct request *rq;
if (q->poll_nsec == -1)
return false;
if (!blk_qc_t_is_internal(cookie))
rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
else {
rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
/*
* With scheduling, if the request has completed, we'll
* get a NULL return here, as we clear the sched tag when
* that happens. The request still remains valid, like always,
* so we should be safe with just the NULL check.
*/
if (!rq)
return false;
}
return blk_mq_poll_hybrid_sleep(q, hctx, rq);
}
static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
{
struct blk_mq_hw_ctx *hctx;
long state; long state;
if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return 0;
hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
/* /*
* If we sleep, have the caller restart the poll loop to reset * If we sleep, have the caller restart the poll loop to reset
* the state. Like for the other success return cases, the * the state. Like for the other success return cases, the
...@@ -3342,7 +3369,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) ...@@ -3342,7 +3369,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
* the IO isn't complete, we'll get called again and will go * the IO isn't complete, we'll get called again and will go
* straight to the busy poll loop. * straight to the busy poll loop.
*/ */
if (blk_mq_poll_hybrid_sleep(q, hctx, rq)) if (blk_mq_poll_hybrid(q, hctx, cookie))
return 1; return 1;
hctx->poll_considered++; hctx->poll_considered++;
...@@ -3353,7 +3380,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) ...@@ -3353,7 +3380,7 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
hctx->poll_invoked++; hctx->poll_invoked++;
ret = q->mq_ops->poll(hctx, rq->tag); ret = q->mq_ops->poll(hctx, -1U);
if (ret > 0) { if (ret > 0) {
hctx->poll_success++; hctx->poll_success++;
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
...@@ -3374,32 +3401,6 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq) ...@@ -3374,32 +3401,6 @@ static int __blk_mq_poll(struct blk_mq_hw_ctx *hctx, struct request *rq)
return 0; return 0;
} }
static int blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
{
struct blk_mq_hw_ctx *hctx;
struct request *rq;
if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return 0;
hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
if (!blk_qc_t_is_internal(cookie))
rq = blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(cookie));
else {
rq = blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(cookie));
/*
* With scheduling, if the request has completed, we'll
* get a NULL return here, as we clear the sched tag when
* that happens. The request still remains valid, like always,
* so we should be safe with just the NULL check.
*/
if (!rq)
return 0;
}
return __blk_mq_poll(hctx, rq);
}
unsigned int blk_mq_rq_cpu(struct request *rq) unsigned int blk_mq_rq_cpu(struct request *rq)
{ {
return rq->mq_ctx->cpu; return rq->mq_ctx->cpu;
......
...@@ -1012,15 +1012,15 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) ...@@ -1012,15 +1012,15 @@ static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
} }
} }
static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start, static inline int nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
u16 *end, int tag) u16 *end, unsigned int tag)
{ {
bool found = false; int found = 0;
*start = nvmeq->cq_head; *start = nvmeq->cq_head;
while (!found && nvme_cqe_pending(nvmeq)) { while (nvme_cqe_pending(nvmeq)) {
if (nvmeq->cqes[nvmeq->cq_head].command_id == tag) if (tag == -1U || nvmeq->cqes[nvmeq->cq_head].command_id == tag)
found = true; found++;
nvme_update_cq_head(nvmeq); nvme_update_cq_head(nvmeq);
} }
*end = nvmeq->cq_head; *end = nvmeq->cq_head;
...@@ -1062,7 +1062,7 @@ static irqreturn_t nvme_irq_check(int irq, void *data) ...@@ -1062,7 +1062,7 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{ {
u16 start, end; u16 start, end;
bool found; int found;
if (!nvme_cqe_pending(nvmeq)) if (!nvme_cqe_pending(nvmeq))
return 0; return 0;
......
...@@ -1409,12 +1409,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) ...@@ -1409,12 +1409,11 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
WARN_ON_ONCE(ret); WARN_ON_ONCE(ret);
} }
static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, static void nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
struct nvme_completion *cqe, struct ib_wc *wc, int tag) struct nvme_completion *cqe, struct ib_wc *wc)
{ {
struct request *rq; struct request *rq;
struct nvme_rdma_request *req; struct nvme_rdma_request *req;
int ret = 0;
rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id); rq = blk_mq_tag_to_rq(nvme_rdma_tagset(queue), cqe->command_id);
if (!rq) { if (!rq) {
...@@ -1422,7 +1421,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, ...@@ -1422,7 +1421,7 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
"tag 0x%x on QP %#x not found\n", "tag 0x%x on QP %#x not found\n",
cqe->command_id, queue->qp->qp_num); cqe->command_id, queue->qp->qp_num);
nvme_rdma_error_recovery(queue->ctrl); nvme_rdma_error_recovery(queue->ctrl);
return ret; return;
} }
req = blk_mq_rq_to_pdu(rq); req = blk_mq_rq_to_pdu(rq);
...@@ -1437,6 +1436,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, ...@@ -1437,6 +1436,8 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
nvme_rdma_error_recovery(queue->ctrl); nvme_rdma_error_recovery(queue->ctrl);
} }
} else if (req->mr) { } else if (req->mr) {
int ret;
ret = nvme_rdma_inv_rkey(queue, req); ret = nvme_rdma_inv_rkey(queue, req);
if (unlikely(ret < 0)) { if (unlikely(ret < 0)) {
dev_err(queue->ctrl->ctrl.device, dev_err(queue->ctrl->ctrl.device,
...@@ -1445,19 +1446,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, ...@@ -1445,19 +1446,14 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
nvme_rdma_error_recovery(queue->ctrl); nvme_rdma_error_recovery(queue->ctrl);
} }
/* the local invalidation completion will end the request */ /* the local invalidation completion will end the request */
return 0; return;
} }
if (refcount_dec_and_test(&req->ref)) { if (refcount_dec_and_test(&req->ref))
if (rq->tag == tag)
ret = 1;
nvme_end_request(rq, req->status, req->result); nvme_end_request(rq, req->status, req->result);
}
return ret;
} }
static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct nvme_rdma_qe *qe = struct nvme_rdma_qe *qe =
container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
...@@ -1465,11 +1461,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) ...@@ -1465,11 +1461,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
struct ib_device *ibdev = queue->device->dev; struct ib_device *ibdev = queue->device->dev;
struct nvme_completion *cqe = qe->data; struct nvme_completion *cqe = qe->data;
const size_t len = sizeof(struct nvme_completion); const size_t len = sizeof(struct nvme_completion);
int ret = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) { if (unlikely(wc->status != IB_WC_SUCCESS)) {
nvme_rdma_wr_error(cq, wc, "RECV"); nvme_rdma_wr_error(cq, wc, "RECV");
return 0; return;
} }
ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE);
...@@ -1484,16 +1479,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag) ...@@ -1484,16 +1479,10 @@ static int __nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc, int tag)
nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
&cqe->result); &cqe->result);
else else
ret = nvme_rdma_process_nvme_rsp(queue, cqe, wc, tag); nvme_rdma_process_nvme_rsp(queue, cqe, wc);
ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE); ib_dma_sync_single_for_device(ibdev, qe->dma, len, DMA_FROM_DEVICE);
nvme_rdma_post_recv(queue, qe); nvme_rdma_post_recv(queue, qe);
return ret;
}
static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
__nvme_rdma_recv_done(cq, wc, -1);
} }
static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue)
...@@ -1758,10 +1747,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) ...@@ -1758,10 +1747,12 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
struct ib_cqe *cqe = wc.wr_cqe; struct ib_cqe *cqe = wc.wr_cqe;
if (cqe) { if (cqe) {
if (cqe->done == nvme_rdma_recv_done) if (cqe->done == nvme_rdma_recv_done) {
found |= __nvme_rdma_recv_done(cq, &wc, tag); nvme_rdma_recv_done(cq, &wc);
else found++;
} else {
cqe->done(cq, &wc); cqe->done(cq, &wc);
}
} }
} }
......
...@@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, ...@@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
int blk_status_to_errno(blk_status_t status); int blk_status_to_errno(blk_status_t status);
blk_status_t errno_to_blk_status(int errno); blk_status_t errno_to_blk_status(int errno);
bool blk_poll(struct request_queue *q, blk_qc_t cookie); int blk_poll(struct request_queue *q, blk_qc_t cookie);
static inline struct request_queue *bdev_get_queue(struct block_device *bdev) static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment