Commit 4b855ad3 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Thomas Gleixner

blk-mq: Create hctx for each present CPU

Currently we only create hctx for online CPUs, which can lead to a lot
of churn due to frequent soft offline / online operations.  Instead
allocate one for each present CPU to avoid this and dramatically simplify
the code.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarJens Axboe <axboe@kernel.dk>
Cc: Keith Busch <keith.busch@intel.com>
Cc: linux-block@vger.kernel.org
Cc: linux-nvme@lists.infradead.org
Link: http://lkml.kernel.org/r/20170626102058.10200-3-hch@lst.deSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 5f042e7c
...@@ -37,9 +37,6 @@ ...@@ -37,9 +37,6 @@
#include "blk-wbt.h" #include "blk-wbt.h"
#include "blk-mq-sched.h" #include "blk-mq-sched.h"
static DEFINE_MUTEX(all_q_mutex);
static LIST_HEAD(all_q_list);
static void blk_mq_poll_stats_start(struct request_queue *q); static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb); static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync); static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
...@@ -1975,8 +1972,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, ...@@ -1975,8 +1972,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
INIT_LIST_HEAD(&__ctx->rq_list); INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q; __ctx->queue = q;
/* If the cpu isn't online, the cpu is mapped to first hctx */ /* If the cpu isn't present, the cpu is mapped to first hctx */
if (!cpu_online(i)) if (!cpu_present(i))
continue; continue;
hctx = blk_mq_map_queue(q, i); hctx = blk_mq_map_queue(q, i);
...@@ -2019,8 +2016,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set, ...@@ -2019,8 +2016,7 @@ static void blk_mq_free_map_and_requests(struct blk_mq_tag_set *set,
} }
} }
static void blk_mq_map_swqueue(struct request_queue *q, static void blk_mq_map_swqueue(struct request_queue *q)
const struct cpumask *online_mask)
{ {
unsigned int i, hctx_idx; unsigned int i, hctx_idx;
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
...@@ -2038,13 +2034,11 @@ static void blk_mq_map_swqueue(struct request_queue *q, ...@@ -2038,13 +2034,11 @@ static void blk_mq_map_swqueue(struct request_queue *q,
} }
/* /*
* Map software to hardware queues * Map software to hardware queues.
*
* If the cpu isn't present, the cpu is mapped to first hctx.
*/ */
for_each_possible_cpu(i) { for_each_present_cpu(i) {
/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpumask_test_cpu(i, online_mask))
continue;
hctx_idx = q->mq_map[i]; hctx_idx = q->mq_map[i];
/* unmapped hw queue can be remapped after CPU topo changed */ /* unmapped hw queue can be remapped after CPU topo changed */
if (!set->tags[hctx_idx] && if (!set->tags[hctx_idx] &&
...@@ -2330,16 +2324,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -2330,16 +2324,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
blk_queue_softirq_done(q, set->ops->complete); blk_queue_softirq_done(q, set->ops->complete);
blk_mq_init_cpu_queues(q, set->nr_hw_queues); blk_mq_init_cpu_queues(q, set->nr_hw_queues);
get_online_cpus();
mutex_lock(&all_q_mutex);
list_add_tail(&q->all_q_node, &all_q_list);
blk_mq_add_queue_tag_set(set, q); blk_mq_add_queue_tag_set(set, q);
blk_mq_map_swqueue(q, cpu_online_mask); blk_mq_map_swqueue(q);
mutex_unlock(&all_q_mutex);
put_online_cpus();
if (!(set->flags & BLK_MQ_F_NO_SCHED)) { if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
int ret; int ret;
...@@ -2365,18 +2351,12 @@ void blk_mq_free_queue(struct request_queue *q) ...@@ -2365,18 +2351,12 @@ void blk_mq_free_queue(struct request_queue *q)
{ {
struct blk_mq_tag_set *set = q->tag_set; struct blk_mq_tag_set *set = q->tag_set;
mutex_lock(&all_q_mutex);
list_del_init(&q->all_q_node);
mutex_unlock(&all_q_mutex);
blk_mq_del_queue_tag_set(q); blk_mq_del_queue_tag_set(q);
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
} }
/* Basically redo blk_mq_init_queue with queue frozen */ /* Basically redo blk_mq_init_queue with queue frozen */
static void blk_mq_queue_reinit(struct request_queue *q, static void blk_mq_queue_reinit(struct request_queue *q)
const struct cpumask *online_mask)
{ {
WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
...@@ -2389,76 +2369,12 @@ static void blk_mq_queue_reinit(struct request_queue *q, ...@@ -2389,76 +2369,12 @@ static void blk_mq_queue_reinit(struct request_queue *q,
* involves free and re-allocate memory, worthy doing?) * involves free and re-allocate memory, worthy doing?)
*/ */
blk_mq_map_swqueue(q, online_mask); blk_mq_map_swqueue(q);
blk_mq_sysfs_register(q); blk_mq_sysfs_register(q);
blk_mq_debugfs_register_hctxs(q); blk_mq_debugfs_register_hctxs(q);
} }
/*
* New online cpumask which is going to be set in this hotplug event.
* Declare this cpumasks as global as cpu-hotplug operation is invoked
* one-by-one and dynamically allocating this could result in a failure.
*/
static struct cpumask cpuhp_online_new;
static void blk_mq_queue_reinit_work(void)
{
struct request_queue *q;
mutex_lock(&all_q_mutex);
/*
* We need to freeze and reinit all existing queues. Freezing
* involves synchronous wait for an RCU grace period and doing it
* one by one may take a long time. Start freezing all queues in
* one swoop and then wait for the completions so that freezing can
* take place in parallel.
*/
list_for_each_entry(q, &all_q_list, all_q_node)
blk_freeze_queue_start(q);
list_for_each_entry(q, &all_q_list, all_q_node)
blk_mq_freeze_queue_wait(q);
list_for_each_entry(q, &all_q_list, all_q_node)
blk_mq_queue_reinit(q, &cpuhp_online_new);
list_for_each_entry(q, &all_q_list, all_q_node)
blk_mq_unfreeze_queue(q);
mutex_unlock(&all_q_mutex);
}
static int blk_mq_queue_reinit_dead(unsigned int cpu)
{
cpumask_copy(&cpuhp_online_new, cpu_online_mask);
blk_mq_queue_reinit_work();
return 0;
}
/*
* Before hotadded cpu starts handling requests, new mappings must be
* established. Otherwise, these requests in hw queue might never be
* dispatched.
*
* For example, there is a single hw queue (hctx) and two CPU queues (ctx0
* for CPU0, and ctx1 for CPU1).
*
* Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
* and set bit0 in pending bitmap as ctx1->index_hw is still zero.
*
* And then while running hw queue, blk_mq_flush_busy_ctxs() finds bit0 is set
* in pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
* But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list is
* ignored.
*/
static int blk_mq_queue_reinit_prepare(unsigned int cpu)
{
cpumask_copy(&cpuhp_online_new, cpu_online_mask);
cpumask_set_cpu(cpu, &cpuhp_online_new);
blk_mq_queue_reinit_work();
return 0;
}
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
{ {
int i; int i;
...@@ -2669,7 +2585,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, ...@@ -2669,7 +2585,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_update_queue_map(set); blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) { list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q); blk_mq_realloc_hw_ctxs(set, q);
blk_mq_queue_reinit(q, cpu_online_mask); blk_mq_queue_reinit(q);
} }
list_for_each_entry(q, &set->tag_list, tag_set_list) list_for_each_entry(q, &set->tag_list, tag_set_list)
...@@ -2885,24 +2801,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie) ...@@ -2885,24 +2801,10 @@ bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie)
} }
EXPORT_SYMBOL_GPL(blk_mq_poll); EXPORT_SYMBOL_GPL(blk_mq_poll);
void blk_mq_disable_hotplug(void)
{
mutex_lock(&all_q_mutex);
}
void blk_mq_enable_hotplug(void)
{
mutex_unlock(&all_q_mutex);
}
static int __init blk_mq_init(void) static int __init blk_mq_init(void)
{ {
cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
blk_mq_hctx_notify_dead); blk_mq_hctx_notify_dead);
cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare",
blk_mq_queue_reinit_prepare,
blk_mq_queue_reinit_dead);
return 0; return 0;
} }
subsys_initcall(blk_mq_init); subsys_initcall(blk_mq_init);
...@@ -56,11 +56,6 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, ...@@ -56,11 +56,6 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head); bool at_head);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list); struct list_head *list);
/*
* CPU hotplug helpers
*/
void blk_mq_enable_hotplug(void);
void blk_mq_disable_hotplug(void);
/* /*
* CPU -> queue mappings * CPU -> queue mappings
......
...@@ -58,7 +58,6 @@ enum cpuhp_state { ...@@ -58,7 +58,6 @@ enum cpuhp_state {
CPUHP_XEN_EVTCHN_PREPARE, CPUHP_XEN_EVTCHN_PREPARE,
CPUHP_ARM_SHMOBILE_SCU_PREPARE, CPUHP_ARM_SHMOBILE_SCU_PREPARE,
CPUHP_SH_SH3X_PREPARE, CPUHP_SH_SH3X_PREPARE,
CPUHP_BLK_MQ_PREPARE,
CPUHP_NET_FLOW_PREPARE, CPUHP_NET_FLOW_PREPARE,
CPUHP_TOPOLOGY_PREPARE, CPUHP_TOPOLOGY_PREPARE,
CPUHP_NET_IUCV_PREPARE, CPUHP_NET_IUCV_PREPARE,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment