Commit c45b1fa2 authored by Ming Lei's avatar Ming Lei Committed by Jens Axboe

nvme-pci: fix nvme_setup_irqs()

When -ENOSPC is returned from pci_alloc_irq_vectors_affinity(),
we still try to allocate multiple irq vectors again, so irq queues
covers the admin queue actually. But we don't consider that, then
number of the allocated irq vector may be same with sum of
io_queues[HCTX_TYPE_DEFAULT] and io_queues[HCTX_TYPE_READ], this way
is obviously wrong, and finally breaks nvme_pci_map_queues(), and
warning from pci_irq_get_affinity() is triggered.

IRQ queues should cover admin queues, this patch makes this
point explicitely in nvme_calc_io_queues().

We got severl boot failure internal report on aarch64, so please
consider to fix it in v4.20.

Fixes: 6451fe73 ("nvme: fix irq vs io_queue calculations")
Signed-off-by: default avatarMing Lei <ming.lei@redhat.com>
Reviewed-by: default avatarKeith Busch <keith.busch@intel.com>
Tested-by: default avatarfin4478 <fin4478@hotmail.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent fb865858
...@@ -2041,14 +2041,18 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) ...@@ -2041,14 +2041,18 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
return ret; return ret;
} }
/* irq_queues covers admin queue */
static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
{ {
unsigned int this_w_queues = write_queues; unsigned int this_w_queues = write_queues;
WARN_ON(!irq_queues);
/* /*
* Setup read/write queue split * Setup read/write queue split, assign admin queue one independent
* irq vector if irq_queues is > 1.
*/ */
if (irq_queues == 1) { if (irq_queues <= 2) {
dev->io_queues[HCTX_TYPE_DEFAULT] = 1; dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
dev->io_queues[HCTX_TYPE_READ] = 0; dev->io_queues[HCTX_TYPE_READ] = 0;
return; return;
...@@ -2056,21 +2060,21 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues) ...@@ -2056,21 +2060,21 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
/* /*
* If 'write_queues' is set, ensure it leaves room for at least * If 'write_queues' is set, ensure it leaves room for at least
* one read queue * one read queue and one admin queue
*/ */
if (this_w_queues >= irq_queues) if (this_w_queues >= irq_queues)
this_w_queues = irq_queues - 1; this_w_queues = irq_queues - 2;
/* /*
* If 'write_queues' is set to zero, reads and writes will share * If 'write_queues' is set to zero, reads and writes will share
* a queue set. * a queue set.
*/ */
if (!this_w_queues) { if (!this_w_queues) {
dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues; dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues - 1;
dev->io_queues[HCTX_TYPE_READ] = 0; dev->io_queues[HCTX_TYPE_READ] = 0;
} else { } else {
dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues; dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues; dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues - 1;
} }
} }
...@@ -2095,7 +2099,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) ...@@ -2095,7 +2099,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
this_p_queues = nr_io_queues - 1; this_p_queues = nr_io_queues - 1;
irq_queues = 1; irq_queues = 1;
} else { } else {
irq_queues = nr_io_queues - this_p_queues; irq_queues = nr_io_queues - this_p_queues + 1;
} }
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues; dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
...@@ -2115,8 +2119,9 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) ...@@ -2115,8 +2119,9 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
* If we got a failure and we're down to asking for just * If we got a failure and we're down to asking for just
* 1 + 1 queues, just ask for a single vector. We'll share * 1 + 1 queues, just ask for a single vector. We'll share
* that between the single IO queue and the admin queue. * that between the single IO queue and the admin queue.
* Otherwise, we assign one independent vector to admin queue.
*/ */
if (result >= 0 && irq_queues > 1) if (irq_queues > 1)
irq_queues = irq_sets[0] + irq_sets[1] + 1; irq_queues = irq_sets[0] + irq_sets[1] + 1;
result = pci_alloc_irq_vectors_affinity(pdev, irq_queues, result = pci_alloc_irq_vectors_affinity(pdev, irq_queues,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment