Commit 79f720a7 authored by Jens Axboe's avatar Jens Axboe

blk-mq: only run the hardware queue if IO is pending

Currently we are inconsistent in when we decide to run the queue. Using
blk_mq_run_hw_queues() we check if the hctx has pending IO before
running it, but we don't do that from the individual queue run function,
blk_mq_run_hw_queue(). This results in a lot of extra and pointless
queue runs, potentially, on flush requests and (much worse) on tag
starvation situations. This is observable just looking at top output,
with lots of kworkers active. For the !async runs, it just adds to the
CPU overhead of blk-mq.

Move the has-pending check into the run function instead of having
callers do it.
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent f0fba398
......@@ -81,12 +81,7 @@ static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
} else
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}
return false;
return blk_mq_run_hw_queue(hctx, true);
}
/*
......
......@@ -61,10 +61,10 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
/*
* Check if any of the ctx's have pending work in this hardware queue
*/
bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
{
return sbitmap_any_bit_set(&hctx->ctx_map) ||
!list_empty_careful(&hctx->dispatch) ||
return !list_empty_careful(&hctx->dispatch) ||
sbitmap_any_bit_set(&hctx->ctx_map) ||
blk_mq_sched_has_work(hctx);
}
......@@ -1253,9 +1253,14 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
__blk_mq_delay_run_hw_queue(hctx, async, 0);
if (blk_mq_hctx_has_pending(hctx)) {
__blk_mq_delay_run_hw_queue(hctx, async, 0);
return true;
}
return false;
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);
......@@ -1265,8 +1270,7 @@ void blk_mq_run_hw_queues(struct request_queue *q, bool async)
int i;
queue_for_each_hw_ctx(q, hctx, i) {
if (!blk_mq_hctx_has_pending(hctx) ||
blk_mq_hctx_stopped(hctx))
if (blk_mq_hctx_stopped(hctx))
continue;
blk_mq_run_hw_queue(hctx, async);
......
......@@ -26,14 +26,12 @@ struct blk_mq_ctx {
struct kobject kobj;
} ____cacheline_aligned_in_smp;
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_freeze_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx);
bool blk_mq_get_driver_tag(struct request *rq, struct blk_mq_hw_ctx **hctx,
bool wait);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
......
......@@ -266,7 +266,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment