Commit 5cb525c8 authored by Jens Axboe's avatar Jens Axboe Committed by Keith Busch

nvme-pci: handle completions outside of the queue lock

Split the completion of events into a two part process:

1) Reap the events inside the queue lock
2) Complete the events outside the queue lock

Since we never wrap the queue, we can access it locklessly after we've
updated the completion queue head. This patch started off with batching
events on the stack, but with this trick we don't have to. Keith Busch
<keith.busch@intel.com> came up with that idea.

Note that this kills the ->cqe_seen as well. I haven't been able to
trigger any ill effects of this. If we do race with polling every so
often, it should be rare enough NOT to trigger any issues.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarKeith Busch <keith.busch@intel.com>
[hch: refactored, restored poll early exit optimization]
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent d1f06f4a
......@@ -161,7 +161,6 @@ struct nvme_queue {
u16 cq_head;
u16 qid;
u8 cq_phase;
u8 cqe_seen;
u32 *dbbuf_sq_db;
u32 *dbbuf_cq_db;
u32 *dbbuf_sq_ei;
......@@ -932,9 +931,9 @@ static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
}
}
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
struct nvme_completion *cqe)
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{
volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
struct request *req;
if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
......@@ -957,50 +956,58 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
return;
}
nvmeq->cqe_seen = 1;
req = blk_mq_tag_to_rq(*nvmeq->tags, cqe->command_id);
nvme_end_request(req, cqe->status, cqe->result);
}
static inline bool nvme_read_cqe(struct nvme_queue *nvmeq,
struct nvme_completion *cqe)
static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
{
if (nvme_cqe_pending(nvmeq)) {
*cqe = nvmeq->cqes[nvmeq->cq_head];
while (start != end) {
nvme_handle_cqe(nvmeq, start);
if (++start == nvmeq->q_depth)
start = 0;
}
}
if (++nvmeq->cq_head == nvmeq->q_depth) {
nvmeq->cq_head = 0;
nvmeq->cq_phase = !nvmeq->cq_phase;
}
return true;
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{
if (++nvmeq->cq_head == nvmeq->q_depth) {
nvmeq->cq_head = 0;
nvmeq->cq_phase = !nvmeq->cq_phase;
}
return false;
}
static void nvme_process_cq(struct nvme_queue *nvmeq)
static inline bool nvme_process_cq(struct nvme_queue *nvmeq, u16 *start,
u16 *end, int tag)
{
struct nvme_completion cqe;
int consumed = 0;
bool found = false;
while (nvme_read_cqe(nvmeq, &cqe)) {
nvme_handle_cqe(nvmeq, &cqe);
consumed++;
*start = nvmeq->cq_head;
while (!found && nvme_cqe_pending(nvmeq)) {
if (nvmeq->cqes[nvmeq->cq_head].command_id == tag)
found = true;
nvme_update_cq_head(nvmeq);
}
*end = nvmeq->cq_head;
if (consumed)
if (*start != *end)
nvme_ring_cq_doorbell(nvmeq);
return found;
}
static irqreturn_t nvme_irq(int irq, void *data)
{
irqreturn_t result;
struct nvme_queue *nvmeq = data;
u16 start, end;
spin_lock(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE;
nvmeq->cqe_seen = 0;
nvme_process_cq(nvmeq, &start, &end, -1);
spin_unlock(&nvmeq->q_lock);
return result;
if (start == end)
return IRQ_NONE;
nvme_complete_cqes(nvmeq, start, end);
return IRQ_HANDLED;
}
static irqreturn_t nvme_irq_check(int irq, void *data)
......@@ -1013,27 +1020,17 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag)
{
struct nvme_completion cqe;
int found = 0, consumed = 0;
u16 start, end;
bool found;
if (!nvme_cqe_pending(nvmeq))
return 0;
spin_lock_irq(&nvmeq->q_lock);
while (nvme_read_cqe(nvmeq, &cqe)) {
nvme_handle_cqe(nvmeq, &cqe);
consumed++;
if (tag == cqe.command_id) {
found = 1;
break;
}
}
if (consumed)
nvme_ring_cq_doorbell(nvmeq);
found = nvme_process_cq(nvmeq, &start, &end, tag);
spin_unlock_irq(&nvmeq->q_lock);
nvme_complete_cqes(nvmeq, start, end);
return found;
}
......@@ -1340,6 +1337,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
struct nvme_queue *nvmeq = &dev->queues[0];
u16 start, end;
if (shutdown)
nvme_shutdown_ctrl(&dev->ctrl);
......@@ -1347,8 +1345,10 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
nvme_process_cq(nvmeq, &start, &end, -1);
spin_unlock_irq(&nvmeq->q_lock);
nvme_complete_cqes(nvmeq, start, end);
}
static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
......@@ -1995,6 +1995,7 @@ static void nvme_del_queue_end(struct request *req, blk_status_t error)
static void nvme_del_cq_end(struct request *req, blk_status_t error)
{
struct nvme_queue *nvmeq = req->end_io_data;
u16 start, end;
if (!error) {
unsigned long flags;
......@@ -2006,8 +2007,10 @@ static void nvme_del_cq_end(struct request *req, blk_status_t error)
*/
spin_lock_irqsave_nested(&nvmeq->q_lock, flags,
SINGLE_DEPTH_NESTING);
nvme_process_cq(nvmeq);
nvme_process_cq(nvmeq, &start, &end, -1);
spin_unlock_irqrestore(&nvmeq->q_lock, flags);
nvme_complete_cqes(nvmeq, start, end);
}
nvme_del_queue_end(req, error);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment