Commit bd166ef1 authored by Jens Axboe's avatar Jens Axboe

blk-mq-sched: add framework for MQ capable IO schedulers

This adds a set of hooks that intercepts the blk-mq path of
allocating/inserting/issuing/completing requests, allowing
us to develop a scheduler within that framework.

We reuse the existing elevator scheduler API on the registration
side, but augment that with the scheduler flagging support for
the blk-mq interfce, and with a separate set of ops hooks for MQ
devices.

We split driver and scheduler tags, so we can run the scheduling
independently of device queue depth.
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
Reviewed-by: default avatarBart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: default avatarOmar Sandoval <osandov@fb.com>
parent 2af8cbe3
...@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ ...@@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/ badblocks.o partitions/
......
...@@ -1223,7 +1223,11 @@ int blkcg_activate_policy(struct request_queue *q, ...@@ -1223,7 +1223,11 @@ int blkcg_activate_policy(struct request_queue *q,
if (blkcg_policy_enabled(q, pol)) if (blkcg_policy_enabled(q, pol))
return 0; return 0;
blk_queue_bypass_start(q); if (q->mq_ops) {
blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
} else
blk_queue_bypass_start(q);
pd_prealloc: pd_prealloc:
if (!pd_prealloc) { if (!pd_prealloc) {
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node); pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
...@@ -1261,7 +1265,10 @@ int blkcg_activate_policy(struct request_queue *q, ...@@ -1261,7 +1265,10 @@ int blkcg_activate_policy(struct request_queue *q,
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
out_bypass_end: out_bypass_end:
blk_queue_bypass_end(q); if (q->mq_ops)
blk_mq_unfreeze_queue(q);
else
blk_queue_bypass_end(q);
if (pd_prealloc) if (pd_prealloc)
pol->pd_free_fn(pd_prealloc); pol->pd_free_fn(pd_prealloc);
return ret; return ret;
...@@ -1284,7 +1291,12 @@ void blkcg_deactivate_policy(struct request_queue *q, ...@@ -1284,7 +1291,12 @@ void blkcg_deactivate_policy(struct request_queue *q,
if (!blkcg_policy_enabled(q, pol)) if (!blkcg_policy_enabled(q, pol))
return; return;
blk_queue_bypass_start(q); if (q->mq_ops) {
blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
} else
blk_queue_bypass_start(q);
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
__clear_bit(pol->plid, q->blkcg_pols); __clear_bit(pol->plid, q->blkcg_pols);
...@@ -1304,7 +1316,11 @@ void blkcg_deactivate_policy(struct request_queue *q, ...@@ -1304,7 +1316,11 @@ void blkcg_deactivate_policy(struct request_queue *q,
} }
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
blk_queue_bypass_end(q);
if (q->mq_ops)
blk_mq_unfreeze_queue(q);
else
blk_queue_bypass_end(q);
} }
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include "blk.h" #include "blk.h"
#include "blk-mq.h" #include "blk-mq.h"
#include "blk-mq-sched.h"
#include "blk-wbt.h" #include "blk-wbt.h"
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
...@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) ...@@ -134,6 +135,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
rq->cmd = rq->__cmd; rq->cmd = rq->__cmd;
rq->cmd_len = BLK_MAX_CDB; rq->cmd_len = BLK_MAX_CDB;
rq->tag = -1; rq->tag = -1;
rq->internal_tag = -1;
rq->start_time = jiffies; rq->start_time = jiffies;
set_start_time_ns(rq); set_start_time_ns(rq);
rq->part = NULL; rq->part = NULL;
...@@ -2127,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) ...@@ -2127,7 +2129,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
if (q->mq_ops) { if (q->mq_ops) {
if (blk_queue_io_stat(q)) if (blk_queue_io_stat(q))
blk_account_io_start(rq, true); blk_account_io_start(rq, true);
blk_mq_insert_request(rq, false, true, false); blk_mq_sched_insert_request(rq, false, true, false);
return 0; return 0;
} }
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/sched/sysctl.h> #include <linux/sched/sysctl.h>
#include "blk.h" #include "blk.h"
#include "blk-mq-sched.h"
/* /*
* for max sense size * for max sense size
...@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, ...@@ -65,7 +66,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
* be reused after dying flag is set * be reused after dying flag is set
*/ */
if (q->mq_ops) { if (q->mq_ops) {
blk_mq_insert_request(rq, at_head, true, false); blk_mq_sched_insert_request(rq, at_head, true, false);
return; return;
} }
......
...@@ -74,6 +74,7 @@ ...@@ -74,6 +74,7 @@
#include "blk.h" #include "blk.h"
#include "blk-mq.h" #include "blk-mq.h"
#include "blk-mq-tag.h" #include "blk-mq-tag.h"
#include "blk-mq-sched.h"
/* FLUSH/FUA sequences */ /* FLUSH/FUA sequences */
enum { enum {
...@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error) ...@@ -391,9 +392,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
* the comment in flush_end_io(). * the comment in flush_end_io().
*/ */
spin_lock_irqsave(&fq->mq_flush_lock, flags); spin_lock_irqsave(&fq->mq_flush_lock, flags);
if (blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error)) blk_flush_complete_seq(rq, fq, REQ_FSEQ_DATA, error);
blk_mq_run_hw_queue(hctx, true);
spin_unlock_irqrestore(&fq->mq_flush_lock, flags); spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
blk_mq_run_hw_queue(hctx, true);
} }
/** /**
...@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq) ...@@ -453,9 +455,9 @@ void blk_insert_flush(struct request *rq)
*/ */
if ((policy & REQ_FSEQ_DATA) && if ((policy & REQ_FSEQ_DATA) &&
!(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
if (q->mq_ops) { if (q->mq_ops)
blk_mq_insert_request(rq, false, true, false); blk_mq_sched_insert_request(rq, false, true, false);
} else else
list_add_tail(&rq->queuelist, &q->queue_head); list_add_tail(&rq->queuelist, &q->queue_head);
return; return;
} }
......
...@@ -43,7 +43,9 @@ static void ioc_exit_icq(struct io_cq *icq) ...@@ -43,7 +43,9 @@ static void ioc_exit_icq(struct io_cq *icq)
if (icq->flags & ICQ_EXITED) if (icq->flags & ICQ_EXITED)
return; return;
if (et->ops.sq.elevator_exit_icq_fn) if (et->uses_mq && et->ops.mq.exit_icq)
et->ops.mq.exit_icq(icq);
else if (!et->uses_mq && et->ops.sq.elevator_exit_icq_fn)
et->ops.sq.elevator_exit_icq_fn(icq); et->ops.sq.elevator_exit_icq_fn(icq);
icq->flags |= ICQ_EXITED; icq->flags |= ICQ_EXITED;
...@@ -383,7 +385,9 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q, ...@@ -383,7 +385,9 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) { if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
hlist_add_head(&icq->ioc_node, &ioc->icq_list); hlist_add_head(&icq->ioc_node, &ioc->icq_list);
list_add(&icq->q_node, &q->icq_list); list_add(&icq->q_node, &q->icq_list);
if (et->ops.sq.elevator_init_icq_fn) if (et->uses_mq && et->ops.mq.init_icq)
et->ops.mq.init_icq(icq);
else if (!et->uses_mq && et->ops.sq.elevator_init_icq_fn)
et->ops.sq.elevator_init_icq_fn(icq); et->ops.sq.elevator_init_icq_fn(icq);
} else { } else {
kmem_cache_free(et->icq_cache, icq); kmem_cache_free(et->icq_cache, icq);
......
...@@ -763,7 +763,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, ...@@ -763,7 +763,7 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
{ {
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;
if (e->type->ops.sq.elevator_allow_rq_merge_fn) if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn)
if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next))
return 0; return 0;
......
/*
* blk-mq scheduling framework
*
* Copyright (C) 2016 Jens Axboe
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blk-mq.h>
#include <trace/events/block.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
#include "blk-wbt.h"
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *))
{
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
if (exit && hctx->sched_data)
exit(hctx);
kfree(hctx->sched_data);
hctx->sched_data = NULL;
}
}
EXPORT_SYMBOL_GPL(blk_mq_sched_free_hctx_data);
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
int (*init)(struct blk_mq_hw_ctx *),
void (*exit)(struct blk_mq_hw_ctx *))
{
struct blk_mq_hw_ctx *hctx;
int ret;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
hctx->sched_data = kmalloc_node(size, GFP_KERNEL, hctx->numa_node);
if (!hctx->sched_data) {
ret = -ENOMEM;
goto error;
}
if (init) {
ret = init(hctx);
if (ret) {
/*
* We don't want to give exit() a partially
* initialized sched_data. init() must clean up
* if it fails.
*/
kfree(hctx->sched_data);
hctx->sched_data = NULL;
goto error;
}
}
}
return 0;
error:
blk_mq_sched_free_hctx_data(q, exit);
return ret;
}
EXPORT_SYMBOL_GPL(blk_mq_sched_init_hctx_data);
static void __blk_mq_sched_assign_ioc(struct request_queue *q,
struct request *rq, struct io_context *ioc)
{
struct io_cq *icq;
spin_lock_irq(q->queue_lock);
icq = ioc_lookup_icq(ioc, q);
spin_unlock_irq(q->queue_lock);
if (!icq) {
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
if (!icq)
return;
}
rq->elv.icq = icq;
if (!blk_mq_sched_get_rq_priv(q, rq)) {
rq->rq_flags |= RQF_ELVPRIV;
get_io_context(icq->ioc);
return;
}
rq->elv.icq = NULL;
}
static void blk_mq_sched_assign_ioc(struct request_queue *q,
struct request *rq, struct bio *bio)
{
struct io_context *ioc;
ioc = rq_ioc(bio);
if (ioc)
__blk_mq_sched_assign_ioc(q, rq, ioc);
}
struct request *blk_mq_sched_get_request(struct request_queue *q,
struct bio *bio,
unsigned int op,
struct blk_mq_alloc_data *data)
{
struct elevator_queue *e = q->elevator;
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
struct request *rq;
const bool is_flush = op & (REQ_PREFLUSH | REQ_FUA);
blk_queue_enter_live(q);
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(data, q, 0, ctx, hctx);
if (e) {
data->flags |= BLK_MQ_REQ_INTERNAL;
/*
* Flush requests are special and go directly to the
* dispatch list.
*/
if (!is_flush && e->type->ops.mq.get_request) {
rq = e->type->ops.mq.get_request(q, op, data);
if (rq)
rq->rq_flags |= RQF_QUEUED;
} else
rq = __blk_mq_alloc_request(data, op);
} else {
rq = __blk_mq_alloc_request(data, op);
data->hctx->tags->rqs[rq->tag] = rq;
}
if (rq) {
if (!is_flush) {
rq->elv.icq = NULL;
if (e && e->type->icq_cache)
blk_mq_sched_assign_ioc(q, rq, bio);
}
data->hctx->queued++;
return rq;
}
blk_queue_exit(q);
return NULL;
}
void blk_mq_sched_put_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
if (rq->rq_flags & RQF_ELVPRIV) {
blk_mq_sched_put_rq_priv(rq->q, rq);
if (rq->elv.icq) {
put_io_context(rq->elv.icq->ioc);
rq->elv.icq = NULL;
}
}
if ((rq->rq_flags & RQF_QUEUED) && e && e->type->ops.mq.put_request)
e->type->ops.mq.put_request(rq);
else
blk_mq_finish_request(rq);
}
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
{
struct elevator_queue *e = hctx->queue->elevator;
LIST_HEAD(rq_list);
if (unlikely(blk_mq_hctx_stopped(hctx)))
return;
hctx->run++;
/*
* If we have previous entries on our dispatch list, grab them first for
* more fair dispatch.
*/
if (!list_empty_careful(&hctx->dispatch)) {
spin_lock(&hctx->lock);
if (!list_empty(&hctx->dispatch))
list_splice_init(&hctx->dispatch, &rq_list);
spin_unlock(&hctx->lock);
}
/*
* Only ask the scheduler for requests, if we didn't have residual
* requests from the dispatch list. This is to avoid the case where
* we only ever dispatch a fraction of the requests available because
* of low device queue depth. Once we pull requests out of the IO
* scheduler, we can no longer merge or sort them. So it's best to
* leave them there for as long as we can. Mark the hw queue as
* needing a restart in that case.
*/
if (list_empty(&rq_list)) {
if (e && e->type->ops.mq.dispatch_requests)
e->type->ops.mq.dispatch_requests(hctx, &rq_list);
else
blk_mq_flush_busy_ctxs(hctx, &rq_list);
} else
blk_mq_sched_mark_restart(hctx);
blk_mq_dispatch_rq_list(hctx, &rq_list);
}
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
struct list_head *rq_list,
struct request *(*get_rq)(struct blk_mq_hw_ctx *))
{
do {
struct request *rq;
rq = get_rq(hctx);
if (!rq)
break;
list_add_tail(&rq->queuelist, rq_list);
} while (1);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_move_to_dispatch);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio)
{
struct request *rq;
int ret;
ret = elv_merge(q, &rq, bio);
if (ret == ELEVATOR_BACK_MERGE) {
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (bio_attempt_back_merge(q, rq, bio)) {
if (!attempt_back_merge(q, rq))
elv_merged_request(q, rq, ret);
return true;
}
} else if (ret == ELEVATOR_FRONT_MERGE) {
if (!blk_mq_sched_allow_merge(q, rq, bio))
return false;
if (bio_attempt_front_merge(q, rq, bio)) {
if (!attempt_front_merge(q, rq))
elv_merged_request(q, rq, ret);
return true;
}
}
return false;
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
if (e->type->ops.mq.bio_merge) {
struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
blk_mq_put_ctx(ctx);
return e->type->ops.mq.bio_merge(hctx, bio);
}
return false;
}
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
{
return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
void blk_mq_sched_request_inserted(struct request *rq)
{
trace_block_rq_insert(rq->q, rq);
}
EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
if (rq->tag == -1) {
rq->rq_flags |= RQF_SORTED;
return false;
}
/*
* If we already have a real request tag, send directly to
* the dispatch list.
*/
spin_lock(&hctx->lock);
list_add(&rq->queuelist, &hctx->dispatch);
spin_unlock(&hctx->lock);
return true;
}
EXPORT_SYMBOL_GPL(blk_mq_sched_bypass_insert);
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
if (hctx->sched_tags) {
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
blk_mq_free_rq_map(hctx->sched_tags);
hctx->sched_tags = NULL;
}
}
int blk_mq_sched_setup(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx;
int ret, i;
/*
* Default to 256, since we don't split into sync/async like the
* old code did. Additionally, this is a per-hw queue depth.
*/
q->nr_requests = 2 * BLKDEV_MAX_RQ;
/*
* We're switching to using an IO scheduler, so setup the hctx
* scheduler tags and switch the request map from the regular
* tags to scheduler tags. First allocate what we need, so we
* can safely fail and fallback, if needed.
*/
ret = 0;
queue_for_each_hw_ctx(q, hctx, i) {
hctx->sched_tags = blk_mq_alloc_rq_map(set, i, q->nr_requests, 0);
if (!hctx->sched_tags) {
ret = -ENOMEM;
break;
}
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, i, q->nr_requests);
if (ret)
break;
}
/*
* If we failed, free what we did allocate
*/
if (ret) {
queue_for_each_hw_ctx(q, hctx, i) {
if (!hctx->sched_tags)
continue;
blk_mq_sched_free_tags(set, hctx, i);
}
return ret;
}
return 0;
}
void blk_mq_sched_teardown(struct request_queue *q)
{
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_sched_free_tags(set, hctx, i);
}
#ifndef BLK_MQ_SCHED_H
#define BLK_MQ_SCHED_H
#include "blk-mq.h"
#include "blk-mq-tag.h"
int blk_mq_sched_init_hctx_data(struct request_queue *q, size_t size,
int (*init)(struct blk_mq_hw_ctx *),
void (*exit)(struct blk_mq_hw_ctx *));
void blk_mq_sched_free_hctx_data(struct request_queue *q,
void (*exit)(struct blk_mq_hw_ctx *));
struct request *blk_mq_sched_get_request(struct request_queue *q, struct bio *bio, unsigned int op, struct blk_mq_alloc_data *data);
void blk_mq_sched_put_request(struct request *rq);
void blk_mq_sched_request_inserted(struct request *rq);
bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio);
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio);
bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
void blk_mq_sched_move_to_dispatch(struct blk_mq_hw_ctx *hctx,
struct list_head *rq_list,
struct request *(*get_rq)(struct blk_mq_hw_ctx *));
int blk_mq_sched_setup(struct request_queue *q);
void blk_mq_sched_teardown(struct request_queue *q);
static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
{
struct elevator_queue *e = q->elevator;
if (!e || blk_queue_nomerges(q) || !bio_mergeable(bio))
return false;
return __blk_mq_sched_bio_merge(q, bio);
}
static inline int blk_mq_sched_get_rq_priv(struct request_queue *q,
struct request *rq)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.get_rq_priv)
return e->type->ops.mq.get_rq_priv(q, rq);
return 0;
}
static inline void blk_mq_sched_put_rq_priv(struct request_queue *q,
struct request *rq)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.put_rq_priv)
e->type->ops.mq.put_rq_priv(q, rq);
}
static inline void
blk_mq_sched_insert_request(struct request *rq, bool at_head, bool run_queue,
bool async)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
if (e && e->type->ops.mq.insert_requests) {
LIST_HEAD(list);
list_add(&rq->queuelist, &list);
e->type->ops.mq.insert_requests(hctx, &list, at_head);
} else {
spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq, at_head);
spin_unlock(&ctx->lock);
}
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
}
static inline void
blk_mq_sched_insert_requests(struct request_queue *q, struct blk_mq_ctx *ctx,
struct list_head *list, bool run_queue_async)
{
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
struct elevator_queue *e = hctx->queue->elevator;
if (e && e->type->ops.mq.insert_requests)
e->type->ops.mq.insert_requests(hctx, list, false);
else
blk_mq_insert_requests(hctx, ctx, list);
blk_mq_run_hw_queue(hctx, run_queue_async);
}
static inline bool
blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.allow_merge)
return e->type->ops.mq.allow_merge(q, rq, bio);
return true;
}
static inline void
blk_mq_sched_completed_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
struct elevator_queue *e = hctx->queue->elevator;
if (e && e->type->ops.mq.completed_request)
e->type->ops.mq.completed_request(hctx, rq);
BUG_ON(rq->internal_tag == -1);
blk_mq_put_tag(hctx, hctx->sched_tags, rq->mq_ctx, rq->internal_tag);
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
blk_mq_run_hw_queue(hctx, true);
}
}
static inline void blk_mq_sched_started_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.started_request)
e->type->ops.mq.started_request(rq);
}
static inline void blk_mq_sched_requeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct elevator_queue *e = q->elevator;
if (e && e->type->ops.mq.requeue_request)
e->type->ops.mq.requeue_request(rq);
}
static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
{
struct elevator_queue *e = hctx->queue->elevator;
if (e && e->type->ops.mq.has_work)
return e->type->ops.mq.has_work(hctx);
return false;
}
static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
#endif
...@@ -231,6 +231,14 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx, ...@@ -231,6 +231,14 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
return ret; return ret;
} }
static ssize_t blk_mq_hw_sysfs_sched_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
{
if (hctx->sched_tags)
return blk_mq_tag_sysfs_show(hctx->sched_tags, page);
return 0;
}
static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
{ {
return blk_mq_tag_sysfs_show(hctx->tags, page); return blk_mq_tag_sysfs_show(hctx->tags, page);
...@@ -345,6 +353,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { ...@@ -345,6 +353,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
.attr = {.name = "pending", .mode = S_IRUGO }, .attr = {.name = "pending", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_rq_list_show, .show = blk_mq_hw_sysfs_rq_list_show,
}; };
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_sched_tags = {
.attr = {.name = "sched_tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_sched_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
.attr = {.name = "tags", .mode = S_IRUGO }, .attr = {.name = "tags", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_tags_show, .show = blk_mq_hw_sysfs_tags_show,
...@@ -370,6 +382,7 @@ static struct attribute *default_hw_ctx_attrs[] = { ...@@ -370,6 +382,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_dispatched.attr, &blk_mq_hw_sysfs_dispatched.attr,
&blk_mq_hw_sysfs_pending.attr, &blk_mq_hw_sysfs_pending.attr,
&blk_mq_hw_sysfs_tags.attr, &blk_mq_hw_sysfs_tags.attr,
&blk_mq_hw_sysfs_sched_tags.attr,
&blk_mq_hw_sysfs_cpus.attr, &blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr, &blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr, &blk_mq_hw_sysfs_poll.attr,
......
This diff is collapsed.
...@@ -52,6 +52,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags, ...@@ -52,6 +52,8 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
*/ */
void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
bool at_head); bool at_head);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
/* /*
* CPU hotplug helpers * CPU hotplug helpers
*/ */
...@@ -124,6 +126,9 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, ...@@ -124,6 +126,9 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data)
{ {
if (data->flags & BLK_MQ_REQ_INTERNAL)
return data->hctx->sched_tags;
return data->hctx->tags; return data->hctx->tags;
} }
...@@ -132,8 +137,9 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data ...@@ -132,8 +137,9 @@ static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data
*/ */
void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
struct request *rq, unsigned int op); struct request *rq, unsigned int op);
void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, void __blk_mq_finish_request(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct request *rq); struct request *rq);
void blk_mq_finish_request(struct request *rq);
struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data, struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
unsigned int op); unsigned int op);
......
...@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq) ...@@ -272,6 +272,7 @@ void blk_queue_end_tag(struct request_queue *q, struct request *rq)
list_del_init(&rq->queuelist); list_del_init(&rq->queuelist);
rq->rq_flags &= ~RQF_QUEUED; rq->rq_flags &= ~RQF_QUEUED;
rq->tag = -1; rq->tag = -1;
rq->internal_tag = -1;
if (unlikely(bqt->tag_index[tag] == NULL)) if (unlikely(bqt->tag_index[tag] == NULL))
printk(KERN_ERR "%s: tag %d is missing\n", printk(KERN_ERR "%s: tag %d is missing\n",
......
This diff is collapsed.
...@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx { ...@@ -22,6 +22,7 @@ struct blk_mq_hw_ctx {
unsigned long flags; /* BLK_MQ_F_* flags */ unsigned long flags; /* BLK_MQ_F_* flags */
void *sched_data;
struct request_queue *queue; struct request_queue *queue;
struct blk_flush_queue *fq; struct blk_flush_queue *fq;
...@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx { ...@@ -35,6 +36,7 @@ struct blk_mq_hw_ctx {
atomic_t wait_index; atomic_t wait_index;
struct blk_mq_tags *tags; struct blk_mq_tags *tags;
struct blk_mq_tags *sched_tags;
struct srcu_struct queue_rq_srcu; struct srcu_struct queue_rq_srcu;
...@@ -156,6 +158,7 @@ enum { ...@@ -156,6 +158,7 @@ enum {
BLK_MQ_S_STOPPED = 0, BLK_MQ_S_STOPPED = 0,
BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_TAG_ACTIVE = 1,
BLK_MQ_S_SCHED_RESTART = 2,
BLK_MQ_MAX_DEPTH = 10240, BLK_MQ_MAX_DEPTH = 10240,
...@@ -179,13 +182,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set); ...@@ -179,13 +182,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
void blk_mq_insert_request(struct request *, bool, bool, bool);
void blk_mq_free_request(struct request *rq); void blk_mq_free_request(struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *); bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
enum { enum {
BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */
BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
}; };
struct request *blk_mq_alloc_request(struct request_queue *q, int rw, struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
......
...@@ -154,6 +154,7 @@ struct request { ...@@ -154,6 +154,7 @@ struct request {
/* the following two fields are internal, NEVER access directly */ /* the following two fields are internal, NEVER access directly */
unsigned int __data_len; /* total data len */ unsigned int __data_len; /* total data len */
int tag;
sector_t __sector; /* sector cursor */ sector_t __sector; /* sector cursor */
struct bio *bio; struct bio *bio;
...@@ -220,9 +221,10 @@ struct request { ...@@ -220,9 +221,10 @@ struct request {
unsigned short ioprio; unsigned short ioprio;
int internal_tag;
void *special; /* opaque pointer available for LLD use */ void *special; /* opaque pointer available for LLD use */
int tag;
int errors; int errors;
/* /*
......
...@@ -77,6 +77,34 @@ struct elevator_ops ...@@ -77,6 +77,34 @@ struct elevator_ops
elevator_registered_fn *elevator_registered_fn; elevator_registered_fn *elevator_registered_fn;
}; };
struct blk_mq_alloc_data;
struct blk_mq_hw_ctx;
struct elevator_mq_ops {
int (*init_sched)(struct request_queue *, struct elevator_type *);
void (*exit_sched)(struct elevator_queue *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
void (*request_merged)(struct request_queue *, struct request *, int);
void (*requests_merged)(struct request_queue *, struct request *, struct request *);
struct request *(*get_request)(struct request_queue *, unsigned int, struct blk_mq_alloc_data *);
void (*put_request)(struct request *);
void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
void (*dispatch_requests)(struct blk_mq_hw_ctx *, struct list_head *);
bool (*has_work)(struct blk_mq_hw_ctx *);
void (*completed_request)(struct blk_mq_hw_ctx *, struct request *);
void (*started_request)(struct request *);
void (*requeue_request)(struct request *);
struct request *(*former_request)(struct request_queue *, struct request *);
struct request *(*next_request)(struct request_queue *, struct request *);
int (*get_rq_priv)(struct request_queue *, struct request *);
void (*put_rq_priv)(struct request_queue *, struct request *);
void (*init_icq)(struct io_cq *);
void (*exit_icq)(struct io_cq *);
};
#define ELV_NAME_MAX (16) #define ELV_NAME_MAX (16)
struct elv_fs_entry { struct elv_fs_entry {
...@@ -96,12 +124,14 @@ struct elevator_type ...@@ -96,12 +124,14 @@ struct elevator_type
/* fields provided by elevator implementation */ /* fields provided by elevator implementation */
union { union {
struct elevator_ops sq; struct elevator_ops sq;
struct elevator_mq_ops mq;
} ops; } ops;
size_t icq_size; /* see iocontext.h */ size_t icq_size; /* see iocontext.h */
size_t icq_align; /* ditto */ size_t icq_align; /* ditto */
struct elv_fs_entry *elevator_attrs; struct elv_fs_entry *elevator_attrs;
char elevator_name[ELV_NAME_MAX]; char elevator_name[ELV_NAME_MAX];
struct module *elevator_owner; struct module *elevator_owner;
bool uses_mq;
/* managed by elevator core */ /* managed by elevator core */
char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */
...@@ -125,6 +155,7 @@ struct elevator_queue ...@@ -125,6 +155,7 @@ struct elevator_queue
struct kobject kobj; struct kobject kobj;
struct mutex sysfs_lock; struct mutex sysfs_lock;
unsigned int registered:1; unsigned int registered:1;
unsigned int uses_mq:1;
DECLARE_HASHTABLE(hash, ELV_HASH_BITS); DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
}; };
...@@ -141,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *, ...@@ -141,6 +172,7 @@ extern void elv_merge_requests(struct request_queue *, struct request *,
extern void elv_merged_request(struct request_queue *, struct request *, int); extern void elv_merged_request(struct request_queue *, struct request *, int);
extern void elv_bio_merged(struct request_queue *q, struct request *, extern void elv_bio_merged(struct request_queue *q, struct request *,
struct bio *); struct bio *);
extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
extern void elv_requeue_request(struct request_queue *, struct request *); extern void elv_requeue_request(struct request_queue *, struct request *);
extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *);
extern struct request *elv_latter_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment