Commit e20ba6e1 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe

block: move queues types to the block layer

Having another indirect all in the fast path doesn't really help
in our post-spectre world.  Also having too many queue type is just
going to create confusion, so I'd rather manage them centrally.

Note that the queue type naming and ordering changes a bit - the
first index now is the default queue for everything not explicitly
marked, the optional ones are read and poll queues.
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 154989e4
...@@ -173,9 +173,16 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) ...@@ -173,9 +173,16 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
return ret; return ret;
} }
static const char *const hctx_types[] = {
[HCTX_TYPE_DEFAULT] = "default",
[HCTX_TYPE_READ] = "read",
[HCTX_TYPE_POLL] = "poll",
};
static ssize_t blk_mq_hw_sysfs_type_show(struct blk_mq_hw_ctx *hctx, char *page) static ssize_t blk_mq_hw_sysfs_type_show(struct blk_mq_hw_ctx *hctx, char *page)
{ {
return sprintf(page, "%u\n", hctx->type); BUILD_BUG_ON(ARRAY_SIZE(hctx_types) != HCTX_MAX_TYPES);
return sprintf(page, "%s\n", hctx_types[hctx->type]);
} }
static struct attribute *default_ctx_attrs[] = { static struct attribute *default_ctx_attrs[] = {
......
...@@ -81,16 +81,14 @@ extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int); ...@@ -81,16 +81,14 @@ extern int blk_mq_hw_queue_to_node(struct blk_mq_queue_map *qmap, unsigned int);
/* /*
* blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue * blk_mq_map_queue_type() - map (hctx_type,cpu) to hardware queue
* @q: request queue * @q: request queue
* @hctx_type: the hctx type index * @type: the hctx type index
* @cpu: CPU * @cpu: CPU
*/ */
static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q, static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *q,
unsigned int hctx_type, enum hctx_type type,
unsigned int cpu) unsigned int cpu)
{ {
struct blk_mq_tag_set *set = q->tag_set; return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
return q->queue_hw_ctx[set->map[hctx_type].mq_map[cpu]];
} }
/* /*
...@@ -103,12 +101,17 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, ...@@ -103,12 +101,17 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
unsigned int flags, unsigned int flags,
unsigned int cpu) unsigned int cpu)
{ {
int hctx_type = 0; enum hctx_type type = HCTX_TYPE_DEFAULT;
if (q->tag_set->nr_maps > HCTX_TYPE_POLL &&
((flags & REQ_HIPRI) && test_bit(QUEUE_FLAG_POLL, &q->queue_flags)))
type = HCTX_TYPE_POLL;
if (q->mq_ops->rq_flags_to_type) else if (q->tag_set->nr_maps > HCTX_TYPE_READ &&
hctx_type = q->mq_ops->rq_flags_to_type(q, flags); ((flags & REQ_OP_MASK) == REQ_OP_READ))
type = HCTX_TYPE_READ;
return blk_mq_map_queue_type(q, hctx_type, cpu); return blk_mq_map_queue_type(q, type, cpu);
} }
/* /*
......
...@@ -95,13 +95,6 @@ struct nvme_queue; ...@@ -95,13 +95,6 @@ struct nvme_queue;
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
enum {
NVMEQ_TYPE_READ,
NVMEQ_TYPE_WRITE,
NVMEQ_TYPE_POLL,
NVMEQ_TYPE_NR,
};
/* /*
* Represents an NVM Express device. Each nvme_dev is a PCI function. * Represents an NVM Express device. Each nvme_dev is a PCI function.
*/ */
...@@ -115,7 +108,7 @@ struct nvme_dev { ...@@ -115,7 +108,7 @@ struct nvme_dev {
struct dma_pool *prp_small_pool; struct dma_pool *prp_small_pool;
unsigned online_queues; unsigned online_queues;
unsigned max_qid; unsigned max_qid;
unsigned io_queues[NVMEQ_TYPE_NR]; unsigned io_queues[HCTX_MAX_TYPES];
unsigned int num_vecs; unsigned int num_vecs;
int q_depth; int q_depth;
u32 db_stride; u32 db_stride;
...@@ -499,10 +492,10 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) ...@@ -499,10 +492,10 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
map->nr_queues = dev->io_queues[i]; map->nr_queues = dev->io_queues[i];
if (!map->nr_queues) { if (!map->nr_queues) {
BUG_ON(i == NVMEQ_TYPE_READ); BUG_ON(i == HCTX_TYPE_DEFAULT);
/* shared set, resuse read set parameters */ /* shared set, resuse read set parameters */
map->nr_queues = dev->io_queues[NVMEQ_TYPE_READ]; map->nr_queues = dev->io_queues[HCTX_TYPE_DEFAULT];
qoff = 0; qoff = 0;
offset = queue_irq_offset(dev); offset = queue_irq_offset(dev);
} }
...@@ -512,7 +505,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) ...@@ -512,7 +505,7 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
* affinity), so use the regular blk-mq cpu mapping * affinity), so use the regular blk-mq cpu mapping
*/ */
map->queue_offset = qoff; map->queue_offset = qoff;
if (i != NVMEQ_TYPE_POLL) if (i != HCTX_TYPE_POLL)
blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
else else
blk_mq_map_queues(map); blk_mq_map_queues(map);
...@@ -961,16 +954,6 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -961,16 +954,6 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
return ret; return ret;
} }
static int nvme_rq_flags_to_type(struct request_queue *q, unsigned int flags)
{
if ((flags & REQ_HIPRI) && test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
return NVMEQ_TYPE_POLL;
if ((flags & REQ_OP_MASK) == REQ_OP_READ)
return NVMEQ_TYPE_READ;
return NVMEQ_TYPE_WRITE;
}
static void nvme_pci_complete_rq(struct request *req) static void nvme_pci_complete_rq(struct request *req)
{ {
struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
...@@ -1634,7 +1617,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = { ...@@ -1634,7 +1617,6 @@ static const struct blk_mq_ops nvme_mq_admin_ops = {
#define NVME_SHARED_MQ_OPS \ #define NVME_SHARED_MQ_OPS \
.queue_rq = nvme_queue_rq, \ .queue_rq = nvme_queue_rq, \
.commit_rqs = nvme_commit_rqs, \ .commit_rqs = nvme_commit_rqs, \
.rq_flags_to_type = nvme_rq_flags_to_type, \
.complete = nvme_pci_complete_rq, \ .complete = nvme_pci_complete_rq, \
.init_hctx = nvme_init_hctx, \ .init_hctx = nvme_init_hctx, \
.init_request = nvme_init_request, \ .init_request = nvme_init_request, \
...@@ -1785,9 +1767,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev) ...@@ -1785,9 +1767,9 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
} }
max = min(dev->max_qid, dev->ctrl.queue_count - 1); max = min(dev->max_qid, dev->ctrl.queue_count - 1);
if (max != 1 && dev->io_queues[NVMEQ_TYPE_POLL]) { if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) {
rw_queues = dev->io_queues[NVMEQ_TYPE_READ] + rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] +
dev->io_queues[NVMEQ_TYPE_WRITE]; dev->io_queues[HCTX_TYPE_READ];
} else { } else {
rw_queues = max; rw_queues = max;
} }
...@@ -2076,9 +2058,9 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) ...@@ -2076,9 +2058,9 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues)
* Setup read/write queue split * Setup read/write queue split
*/ */
if (nr_io_queues == 1) { if (nr_io_queues == 1) {
dev->io_queues[NVMEQ_TYPE_READ] = 1; dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
dev->io_queues[NVMEQ_TYPE_WRITE] = 0; dev->io_queues[HCTX_TYPE_READ] = 0;
dev->io_queues[NVMEQ_TYPE_POLL] = 0; dev->io_queues[HCTX_TYPE_POLL] = 0;
return; return;
} }
...@@ -2095,10 +2077,10 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) ...@@ -2095,10 +2077,10 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues)
this_p_queues = nr_io_queues - 1; this_p_queues = nr_io_queues - 1;
} }
dev->io_queues[NVMEQ_TYPE_POLL] = this_p_queues; dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
nr_io_queues -= this_p_queues; nr_io_queues -= this_p_queues;
} else } else
dev->io_queues[NVMEQ_TYPE_POLL] = 0; dev->io_queues[HCTX_TYPE_POLL] = 0;
/* /*
* If 'write_queues' is set, ensure it leaves room for at least * If 'write_queues' is set, ensure it leaves room for at least
...@@ -2112,11 +2094,11 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues) ...@@ -2112,11 +2094,11 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int nr_io_queues)
* a queue set. * a queue set.
*/ */
if (!this_w_queues) { if (!this_w_queues) {
dev->io_queues[NVMEQ_TYPE_WRITE] = 0; dev->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
dev->io_queues[NVMEQ_TYPE_READ] = nr_io_queues; dev->io_queues[HCTX_TYPE_READ] = 0;
} else { } else {
dev->io_queues[NVMEQ_TYPE_WRITE] = this_w_queues; dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
dev->io_queues[NVMEQ_TYPE_READ] = nr_io_queues - this_w_queues; dev->io_queues[HCTX_TYPE_READ] = nr_io_queues - this_w_queues;
} }
} }
...@@ -2138,8 +2120,8 @@ static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues) ...@@ -2138,8 +2120,8 @@ static int nvme_setup_irqs(struct nvme_dev *dev, int nr_io_queues)
*/ */
do { do {
nvme_calc_io_queues(dev, nr_io_queues); nvme_calc_io_queues(dev, nr_io_queues);
irq_sets[0] = dev->io_queues[NVMEQ_TYPE_READ]; irq_sets[0] = dev->io_queues[HCTX_TYPE_DEFAULT];
irq_sets[1] = dev->io_queues[NVMEQ_TYPE_WRITE]; irq_sets[1] = dev->io_queues[HCTX_TYPE_READ];
if (!irq_sets[1]) if (!irq_sets[1])
affd.nr_sets = 1; affd.nr_sets = 1;
...@@ -2226,12 +2208,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2226,12 +2208,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
dev->num_vecs = result; dev->num_vecs = result;
result = max(result - 1, 1); result = max(result - 1, 1);
dev->max_qid = result + dev->io_queues[NVMEQ_TYPE_POLL]; dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
dev_info(dev->ctrl.device, "%d/%d/%d read/write/poll queues\n", dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
dev->io_queues[NVMEQ_TYPE_READ], dev->io_queues[HCTX_TYPE_DEFAULT],
dev->io_queues[NVMEQ_TYPE_WRITE], dev->io_queues[HCTX_TYPE_READ],
dev->io_queues[NVMEQ_TYPE_POLL]); dev->io_queues[HCTX_TYPE_POLL]);
/* /*
* Should investigate if there's a performance win from allocating * Should investigate if there's a performance win from allocating
...@@ -2332,13 +2314,13 @@ static int nvme_dev_add(struct nvme_dev *dev) ...@@ -2332,13 +2314,13 @@ static int nvme_dev_add(struct nvme_dev *dev)
int ret; int ret;
if (!dev->ctrl.tagset) { if (!dev->ctrl.tagset) {
if (!dev->io_queues[NVMEQ_TYPE_POLL]) if (!dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.ops = &nvme_mq_ops; dev->tagset.ops = &nvme_mq_ops;
else else
dev->tagset.ops = &nvme_mq_poll_noirq_ops; dev->tagset.ops = &nvme_mq_poll_noirq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1; dev->tagset.nr_hw_queues = dev->online_queues - 1;
dev->tagset.nr_maps = NVMEQ_TYPE_NR; dev->tagset.nr_maps = HCTX_MAX_TYPES;
dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.timeout = NVME_IO_TIMEOUT;
dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.numa_node = dev_to_node(dev->dev);
dev->tagset.queue_depth = dev->tagset.queue_depth =
......
...@@ -81,8 +81,12 @@ struct blk_mq_queue_map { ...@@ -81,8 +81,12 @@ struct blk_mq_queue_map {
unsigned int queue_offset; unsigned int queue_offset;
}; };
enum { enum hctx_type {
HCTX_MAX_TYPES = 3, HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */
HCTX_TYPE_READ, /* just for READ I/O */
HCTX_TYPE_POLL, /* polled I/O of any kind */
HCTX_MAX_TYPES,
}; };
struct blk_mq_tag_set { struct blk_mq_tag_set {
...@@ -118,8 +122,6 @@ struct blk_mq_queue_data { ...@@ -118,8 +122,6 @@ struct blk_mq_queue_data {
typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *,
const struct blk_mq_queue_data *); const struct blk_mq_queue_data *);
typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *);
/* takes rq->cmd_flags as input, returns a hardware type index */
typedef int (rq_flags_to_type_fn)(struct request_queue *, unsigned int);
typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *);
typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *);
typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
...@@ -154,11 +156,6 @@ struct blk_mq_ops { ...@@ -154,11 +156,6 @@ struct blk_mq_ops {
*/ */
commit_rqs_fn *commit_rqs; commit_rqs_fn *commit_rqs;
/*
* Return a queue map type for the given request/bio flags
*/
rq_flags_to_type_fn *rq_flags_to_type;
/* /*
* Reserve budget before queue request, once .queue_rq is * Reserve budget before queue request, once .queue_rq is
* run, it is driver's responsibility to release the * run, it is driver's responsibility to release the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment