Commit 6a83e74d authored by Bart Van Assche's avatar Bart Van Assche Committed by Jens Axboe

blk-mq: Introduce blk_mq_quiesce_queue()

blk_mq_quiesce_queue() waits until ongoing .queue_rq() invocations
have finished. This function does *not* wait until all outstanding
requests have finished (this means invocation of request.end_io()).
The algorithm used by blk_mq_quiesce_queue() is as follows:
* Hold either an RCU read lock or an SRCU read lock around
  .queue_rq() calls. The former is used if .queue_rq() does not
  block and the latter if .queue_rq() may block.
* blk_mq_quiesce_queue() first calls blk_mq_stop_hw_queues()
  followed by synchronize_srcu() or synchronize_rcu(). The latter
  call waits for .queue_rq() invocations that started before
  blk_mq_quiesce_queue() was called.
* The blk_mq_hctx_stopped() calls that control whether or not
  .queue_rq() will be called are called with the (S)RCU read lock
  held. This is necessary to avoid race conditions against
  blk_mq_quiesce_queue().
Signed-off-by: default avatarBart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
Reviewed-by: default avatarMing Lei <tom.leiming@gmail.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 9b7dd572
...@@ -5,6 +5,7 @@ menuconfig BLOCK ...@@ -5,6 +5,7 @@ menuconfig BLOCK
bool "Enable the block layer" if EXPERT bool "Enable the block layer" if EXPERT
default y default y
select SBITMAP select SBITMAP
select SRCU
help help
Provide block layer support for the kernel. Provide block layer support for the kernel.
......
...@@ -115,6 +115,33 @@ void blk_mq_unfreeze_queue(struct request_queue *q) ...@@ -115,6 +115,33 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
} }
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue); EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
/**
* blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
* @q: request queue.
*
* Note: this function does not prevent that the struct request end_io()
* callback function is invoked. Additionally, it is not prevented that
* new queue_rq() calls occur unless the queue has been stopped first.
*/
void blk_mq_quiesce_queue(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
bool rcu = false;
blk_mq_stop_hw_queues(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
synchronize_srcu(&hctx->queue_rq_srcu);
else
rcu = true;
}
if (rcu)
synchronize_rcu();
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
void blk_mq_wake_waiters(struct request_queue *q) void blk_mq_wake_waiters(struct request_queue *q)
{ {
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
...@@ -766,7 +793,7 @@ static inline unsigned int queued_to_index(unsigned int queued) ...@@ -766,7 +793,7 @@ static inline unsigned int queued_to_index(unsigned int queued)
* of IO. In particular, we'd like FIFO behaviour on handling existing * of IO. In particular, we'd like FIFO behaviour on handling existing
* items on the hctx->dispatch list. Ignore that for now. * items on the hctx->dispatch list. Ignore that for now.
*/ */
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
{ {
struct request_queue *q = hctx->queue; struct request_queue *q = hctx->queue;
struct request *rq; struct request *rq;
...@@ -778,9 +805,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) ...@@ -778,9 +805,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
if (unlikely(blk_mq_hctx_stopped(hctx))) if (unlikely(blk_mq_hctx_stopped(hctx)))
return; return;
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
cpu_online(hctx->next_cpu));
hctx->run++; hctx->run++;
/* /*
...@@ -871,6 +895,24 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) ...@@ -871,6 +895,24 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
} }
} }
static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
{
int srcu_idx;
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
cpu_online(hctx->next_cpu));
if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
rcu_read_lock();
blk_mq_process_rq_list(hctx);
rcu_read_unlock();
} else {
srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
blk_mq_process_rq_list(hctx);
srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
}
}
/* /*
* It'd be great if the workqueue API had a way to pass * It'd be great if the workqueue API had a way to pass
* in a mask and had some smarts for more clever placement. * in a mask and had some smarts for more clever placement.
...@@ -1268,7 +1310,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) ...@@ -1268,7 +1310,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA);
struct blk_mq_alloc_data data; struct blk_mq_alloc_data data;
struct request *rq; struct request *rq;
unsigned int request_count = 0; unsigned int request_count = 0, srcu_idx;
struct blk_plug *plug; struct blk_plug *plug;
struct request *same_queue_rq = NULL; struct request *same_queue_rq = NULL;
blk_qc_t cookie; blk_qc_t cookie;
...@@ -1311,7 +1353,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) ...@@ -1311,7 +1353,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_bio_to_request(rq, bio); blk_mq_bio_to_request(rq, bio);
/* /*
* We do limited pluging. If the bio can be merged, do that. * We do limited plugging. If the bio can be merged, do that.
* Otherwise the existing request in the plug list will be * Otherwise the existing request in the plug list will be
* issued. So the plug list will have one request at most * issued. So the plug list will have one request at most
*/ */
...@@ -1331,7 +1373,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) ...@@ -1331,7 +1373,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_put_ctx(data.ctx); blk_mq_put_ctx(data.ctx);
if (!old_rq) if (!old_rq)
goto done; goto done;
if (!(data.hctx->flags & BLK_MQ_F_BLOCKING)) {
rcu_read_lock();
blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
rcu_read_unlock();
} else {
srcu_idx = srcu_read_lock(&data.hctx->queue_rq_srcu);
blk_mq_try_issue_directly(data.hctx, old_rq, &cookie); blk_mq_try_issue_directly(data.hctx, old_rq, &cookie);
srcu_read_unlock(&data.hctx->queue_rq_srcu, srcu_idx);
}
goto done; goto done;
} }
...@@ -1610,6 +1661,9 @@ static void blk_mq_exit_hctx(struct request_queue *q, ...@@ -1610,6 +1661,9 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (set->ops->exit_hctx) if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx); set->ops->exit_hctx(hctx, hctx_idx);
if (hctx->flags & BLK_MQ_F_BLOCKING)
cleanup_srcu_struct(&hctx->queue_rq_srcu);
blk_mq_remove_cpuhp(hctx); blk_mq_remove_cpuhp(hctx);
blk_free_flush_queue(hctx->fq); blk_free_flush_queue(hctx->fq);
sbitmap_free(&hctx->ctx_map); sbitmap_free(&hctx->ctx_map);
...@@ -1690,6 +1744,9 @@ static int blk_mq_init_hctx(struct request_queue *q, ...@@ -1690,6 +1744,9 @@ static int blk_mq_init_hctx(struct request_queue *q,
flush_start_tag + hctx_idx, node)) flush_start_tag + hctx_idx, node))
goto free_fq; goto free_fq;
if (hctx->flags & BLK_MQ_F_BLOCKING)
init_srcu_struct(&hctx->queue_rq_srcu);
return 0; return 0;
free_fq: free_fq:
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/sbitmap.h> #include <linux/sbitmap.h>
#include <linux/srcu.h>
struct blk_mq_tags; struct blk_mq_tags;
struct blk_flush_queue; struct blk_flush_queue;
...@@ -35,6 +36,8 @@ struct blk_mq_hw_ctx { ...@@ -35,6 +36,8 @@ struct blk_mq_hw_ctx {
struct blk_mq_tags *tags; struct blk_mq_tags *tags;
struct srcu_struct queue_rq_srcu;
unsigned long queued; unsigned long queued;
unsigned long run; unsigned long run;
#define BLK_MQ_MAX_DISPATCH_ORDER 7 #define BLK_MQ_MAX_DISPATCH_ORDER 7
......
...@@ -918,6 +918,7 @@ extern void __blk_run_queue(struct request_queue *q); ...@@ -918,6 +918,7 @@ extern void __blk_run_queue(struct request_queue *q);
extern void __blk_run_queue_uncond(struct request_queue *q); extern void __blk_run_queue_uncond(struct request_queue *q);
extern void blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *);
extern void blk_run_queue_async(struct request_queue *q); extern void blk_run_queue_async(struct request_queue *q);
extern void blk_mq_quiesce_queue(struct request_queue *q);
extern int blk_rq_map_user(struct request_queue *, struct request *, extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long, struct rq_map_data *, void __user *, unsigned long,
gfp_t); gfp_t);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment