Commit d3c81080 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.17/block-2022-01-11' of git://git.kernel.dk/linux-block

Pull block updates from Jens Axboe:

 - Unify where the struct request handling code is located in the blk-mq
   code (Christoph)

 - Header cleanups (Christoph)

 - Clean up the io_context handling code (Christoph, me)

 - Get rid of ->rq_disk in struct request (Christoph)

 - Error handling fix for add_disk() (Christoph)

 - request allocation cleanusp (Christoph)

 - Documentation updates (Eric, Matthew)

 - Remove trivial crypto unregister helper (Eric)

 - Reduce shared tag overhead (John)

 - Reduce poll_stats memory overhead (me)

 - Known indirect function call for dio (me)

 - Use atomic references for struct request (me)

 - Support request list issue for block and NVMe (me)

 - Improve queue dispatch pinning (Ming)

 - Improve the direct list issue code (Keith)

 - BFQ improvements (Jan)

 - Direct completion helper and use it in mmc block (Sebastian)

 - Use raw spinlock for the blktrace code (Wander)

 - fsync error handling fix (Ye)

 - Various fixes and cleanups (Lukas, Randy, Yang, Tetsuo, Ming, me)

* tag 'for-5.17/block-2022-01-11' of git://git.kernel.dk/linux-block: (132 commits)
  MAINTAINERS: add entries for block layer documentation
  docs: block: remove queue-sysfs.rst
  docs: sysfs-block: document virt_boundary_mask
  docs: sysfs-block: document stable_writes
  docs: sysfs-block: fill in missing documentation from queue-sysfs.rst
  docs: sysfs-block: add contact for nomerges
  docs: sysfs-block: sort alphabetically
  docs: sysfs-block: move to stable directory
  block: don't protect submit_bio_checks by q_usage_counter
  block: fix old-style declaration
  nvme-pci: fix queue_rqs list splitting
  block: introduce rq_list_move
  block: introduce rq_list_for_each_safe macro
  block: move rq_list macros to blk-mq.h
  block: drop needless assignment in set_task_ioprio()
  block: remove unnecessary trailing '\'
  bio.h: fix kernel-doc warnings
  block: check minor range in device_add_disk()
  block: use "unsigned long" for blk_validate_block_size().
  block: fix error unwinding in device_add_disk
  ...
parents 42a7b4ed f029cedb
......@@ -20,7 +20,6 @@ Block
kyber-iosched
null_blk
pr
queue-sysfs
request
stat
switching-sched
......
This diff is collapsed.
......@@ -294,9 +294,6 @@ Block Devices
.. kernel-doc:: block/blk-settings.c
:export:
.. kernel-doc:: block/blk-exec.c
:export:
.. kernel-doc:: block/blk-flush.c
:export:
......
......@@ -292,8 +292,6 @@ block/blk-sysfs.c
block/blk-settings.c
block/blk-exec.c
block/blk-flush.c
block/blk-lib.c
......
......@@ -3416,6 +3416,8 @@ M: Jens Axboe <axboe@kernel.dk>
L: linux-block@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
F: Documentation/ABI/stable/sysfs-block
F: Documentation/block/
F: block/
F: drivers/block/
F: include/linux/blk*
......
......@@ -35,6 +35,9 @@ config BLK_CGROUP_RWSTAT
config BLK_DEV_BSG_COMMON
tristate
config BLK_ICQ
bool
config BLK_DEV_BSGLIB
bool "Block layer SG support v4 helper lib"
select BLK_DEV_BSG_COMMON
......
......@@ -18,6 +18,7 @@ config MQ_IOSCHED_KYBER
config IOSCHED_BFQ
tristate "BFQ I/O scheduler"
select BLK_ICQ
help
BFQ I/O scheduler for BLK-MQ. BFQ distributes the bandwidth of
of the device among all processes according to their weights,
......
......@@ -5,7 +5,7 @@
obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-timeout.o \
blk-merge.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
......
......@@ -665,7 +665,7 @@ static void blkdev_flush_mapping(struct block_device *bdev)
static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
{
struct gendisk *disk = bdev->bd_disk;
int ret = 0;
int ret;
if (disk->fops->open) {
ret = disk->fops->open(bdev, mode);
......@@ -750,14 +750,6 @@ struct block_device *blkdev_get_no_open(dev_t dev)
if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
bdev = NULL;
iput(inode);
if (!bdev)
return NULL;
if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN)) {
put_device(&bdev->bd_device);
return NULL;
}
return bdev;
}
......@@ -837,7 +829,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
* used in blkdev_get/put().
*/
if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
(disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
(disk->event_flags & DISK_EVENT_FLAG_BLOCK_ON_EXCL_WRITE)) {
bdev->bd_write_holder = true;
unblock_events = false;
}
......@@ -963,15 +955,15 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
EXPORT_SYMBOL(blkdev_put);
/**
* lookup_bdev - lookup a struct block_device by name
* @pathname: special file representing the block device
* @dev: return value of the block device's dev_t
* lookup_bdev() - Look up a struct block_device by name.
* @pathname: Name of the block device in the filesystem.
* @dev: Pointer to the block device's dev_t, if found.
*
* Lookup the block device's dev_t at @pathname in the current
* namespace if possible and return it by @dev.
* namespace if possible and return it in @dev.
*
* RETURNS:
* 0 if succeeded, errno otherwise.
* Context: May sleep.
* Return: 0 if succeeded, negative errno otherwise.
*/
int lookup_bdev(const char *pathname, dev_t *dev)
{
......
This diff is collapsed.
......@@ -25,7 +25,7 @@
#define BFQ_DEFAULT_GRP_IOPRIO 0
#define BFQ_DEFAULT_GRP_CLASS IOPRIO_CLASS_BE
#define MAX_PID_STR_LENGTH 12
#define MAX_BFQQ_NAME_LENGTH 16
/*
* Soft real-time applications are extremely more latency sensitive
......@@ -170,6 +170,9 @@ struct bfq_entity {
/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
int budget;
/* Number of requests allocated in the subtree of this entity */
int allocated;
/* device weight, if non-zero, it overrides the default weight of
* bfq_group_data */
int dev_weight;
......@@ -266,8 +269,6 @@ struct bfq_queue {
struct request *next_rq;
/* number of sync and async requests queued */
int queued[2];
/* number of requests currently allocated */
int allocated;
/* number of pending metadata requests */
int meta_pending;
/* fifo list of requests in sort_list */
......@@ -387,6 +388,8 @@ struct bfq_queue {
struct bfq_queue *tentative_waker_bfqq;
/* number of times the same tentative waker has been detected */
unsigned int num_waker_detections;
/* time when we started considering this waker */
u64 waker_detection_started;
/* node for woken_list, see below */
struct hlist_node woken_list_node;
......@@ -768,6 +771,7 @@ struct bfq_data {
* function)
*/
unsigned int word_depths[2][2];
unsigned int full_depth_shift;
};
enum bfqq_state_flags {
......@@ -1079,26 +1083,27 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq);
/* --------------- end of interface of B-WF2Q+ ---------------- */
/* Logging facilities. */
static inline void bfq_pid_to_str(int pid, char *str, int len)
static inline void bfq_bfqq_name(struct bfq_queue *bfqq, char *str, int len)
{
if (pid != -1)
snprintf(str, len, "%d", pid);
char type = bfq_bfqq_sync(bfqq) ? 'S' : 'A';
if (bfqq->pid != -1)
snprintf(str, len, "bfq%d%c", bfqq->pid, type);
else
snprintf(str, len, "SHARED-");
snprintf(str, len, "bfqSHARED-%c", type);
}
#ifdef CONFIG_BFQ_GROUP_IOSCHED
struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
char pid_str[MAX_PID_STR_LENGTH]; \
char pid_str[MAX_BFQQ_NAME_LENGTH]; \
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
break; \
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
blk_add_cgroup_trace_msg((bfqd)->queue, \
bfqg_to_blkg(bfqq_group(bfqq))->blkcg, \
"bfq%s%c " fmt, pid_str, \
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', ##args); \
"%s " fmt, pid_str, ##args); \
} while (0)
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do { \
......@@ -1109,13 +1114,11 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq);
#else /* CONFIG_BFQ_GROUP_IOSCHED */
#define bfq_log_bfqq(bfqd, bfqq, fmt, args...) do { \
char pid_str[MAX_PID_STR_LENGTH]; \
char pid_str[MAX_BFQQ_NAME_LENGTH]; \
if (likely(!blk_trace_note_message_enabled((bfqd)->queue))) \
break; \
bfq_pid_to_str((bfqq)->pid, pid_str, MAX_PID_STR_LENGTH); \
blk_add_trace_msg((bfqd)->queue, "bfq%s%c " fmt, pid_str, \
bfq_bfqq_sync((bfqq)) ? 'S' : 'A', \
##args); \
bfq_bfqq_name((bfqq), pid_str, MAX_BFQQ_NAME_LENGTH); \
blk_add_trace_msg((bfqd)->queue, "%s " fmt, pid_str, ##args); \
} while (0)
#define bfq_log_bfqg(bfqd, bfqg, fmt, args...) do {} while (0)
......
......@@ -26,7 +26,7 @@
#include "blk-rq-qos.h"
struct bio_alloc_cache {
struct bio_list free_list;
struct bio *free_list;
unsigned int nr;
};
......@@ -630,7 +630,8 @@ static void bio_alloc_cache_prune(struct bio_alloc_cache *cache,
unsigned int i = 0;
struct bio *bio;
while ((bio = bio_list_pop(&cache->free_list)) != NULL) {
while ((bio = cache->free_list) != NULL) {
cache->free_list = bio->bi_next;
cache->nr--;
bio_free(bio);
if (++i == nr)
......@@ -689,7 +690,8 @@ void bio_put(struct bio *bio)
bio_uninit(bio);
cache = per_cpu_ptr(bio->bi_pool->cache, get_cpu());
bio_list_add_head(&cache->free_list, bio);
bio->bi_next = cache->free_list;
cache->free_list = bio;
if (++cache->nr > ALLOC_CACHE_MAX + ALLOC_CACHE_SLACK)
bio_alloc_cache_prune(cache, ALLOC_CACHE_SLACK);
put_cpu();
......@@ -1704,8 +1706,9 @@ struct bio *bio_alloc_kiocb(struct kiocb *kiocb, unsigned short nr_vecs,
return bio_alloc_bioset(GFP_KERNEL, nr_vecs, bs);
cache = per_cpu_ptr(bs->cache, get_cpu());
bio = bio_list_pop(&cache->free_list);
if (bio) {
if (cache->free_list) {
bio = cache->free_list;
cache->free_list = bio->bi_next;
cache->nr--;
put_cpu();
bio_init(bio, nr_vecs ? bio->bi_inline_vecs : NULL, nr_vecs);
......
......@@ -30,6 +30,7 @@
#include <linux/blk-cgroup.h>
#include <linux/tracehook.h>
#include <linux/psi.h>
#include <linux/part_stat.h>
#include "blk.h"
#include "blk-ioprio.h"
#include "blk-throttle.h"
......
This diff is collapsed.
......@@ -463,11 +463,6 @@ bool blk_crypto_register(struct blk_crypto_profile *profile,
}
EXPORT_SYMBOL_GPL(blk_crypto_register);
void blk_crypto_unregister(struct request_queue *q)
{
q->crypto_profile = NULL;
}
/**
* blk_crypto_intersect_capabilities() - restrict supported crypto capabilities
* by child device
......
// SPDX-License-Identifier: GPL-2.0
/*
* Functions related to setting various queue properties from drivers
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
#include <linux/sched/sysctl.h>
#include "blk.h"
#include "blk-mq-sched.h"
/**
* blk_end_sync_rq - executes a completion event on a request
* @rq: request to complete
* @error: end I/O status of the request
*/
static void blk_end_sync_rq(struct request *rq, blk_status_t error)
{
struct completion *waiting = rq->end_io_data;
rq->end_io_data = (void *)(uintptr_t)error;
/*
* complete last, if this is a stack request the process (and thus
* the rq pointer) could be invalid right after this complete()
*/
complete(waiting);
}
/**
* blk_execute_rq_nowait - insert a request to I/O scheduler for execution
* @bd_disk: matching gendisk
* @rq: request to insert
* @at_head: insert request at head or tail of queue
* @done: I/O completion handler
*
* Description:
* Insert a fully prepared request at the back of the I/O scheduler queue
* for execution. Don't wait for completion.
*
* Note:
* This function will invoke @done directly if the queue is dead.
*/
void blk_execute_rq_nowait(struct gendisk *bd_disk, struct request *rq,
int at_head, rq_end_io_fn *done)
{
WARN_ON(irqs_disabled());
WARN_ON(!blk_rq_is_passthrough(rq));
rq->rq_disk = bd_disk;
rq->end_io = done;
blk_account_io_start(rq);
/*
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
*/
blk_mq_sched_insert_request(rq, at_head, true, false);
}
EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
static bool blk_rq_is_poll(struct request *rq)
{
if (!rq->mq_hctx)
return false;
if (rq->mq_hctx->type != HCTX_TYPE_POLL)
return false;
if (WARN_ON_ONCE(!rq->bio))
return false;
return true;
}
static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
{
do {
bio_poll(rq->bio, NULL, 0);
cond_resched();
} while (!completion_done(wait));
}
/**
* blk_execute_rq - insert a request into queue for execution
* @bd_disk: matching gendisk
* @rq: request to insert
* @at_head: insert request at head or tail of queue
*
* Description:
* Insert a fully prepared request at the back of the I/O scheduler queue
* for execution and wait for completion.
* Return: The blk_status_t result provided to blk_mq_end_request().
*/
blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq, int at_head)
{
DECLARE_COMPLETION_ONSTACK(wait);
unsigned long hang_check;
rq->end_io_data = &wait;
blk_execute_rq_nowait(bd_disk, rq, at_head, blk_end_sync_rq);
/* Prevent hang_check timer from firing at us during very long I/O */
hang_check = sysctl_hung_task_timeout_secs;
if (blk_rq_is_poll(rq))
blk_rq_poll_completion(rq, &wait);
else if (hang_check)
while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2)));
else
wait_for_completion_io(&wait);
return (blk_status_t)(uintptr_t)rq->end_io_data;
}
EXPORT_SYMBOL(blk_execute_rq);
......@@ -69,6 +69,7 @@
#include <linux/blkdev.h>
#include <linux/gfp.h>
#include <linux/blk-mq.h>
#include <linux/part_stat.h>
#include "blk.h"
#include "blk-mq.h"
......@@ -95,6 +96,12 @@ enum {
static void blk_kick_flush(struct request_queue *q,
struct blk_flush_queue *fq, unsigned int flags);
static inline struct blk_flush_queue *
blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
{
return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
}
static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
{
unsigned int policy = 0;
......@@ -138,7 +145,7 @@ static void blk_flush_queue_rq(struct request *rq, bool add_front)
static void blk_account_io_flush(struct request *rq)
{
struct block_device *part = rq->rq_disk->part0;
struct block_device *part = rq->q->disk->part0;
part_stat_lock();
part_stat_inc(part, ios[STAT_FLUSH]);
......@@ -222,7 +229,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
/* release the tag's ownership to the req cloned from */
spin_lock_irqsave(&fq->mq_flush_lock, flags);
if (!refcount_dec_and_test(&flush_rq->ref)) {
if (!req_ref_put_and_test(flush_rq)) {
fq->rq_status = error;
spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
return;
......@@ -235,8 +242,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
* avoiding use-after-free.
*/
WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE);
if (fq->rq_status != BLK_STS_OK)
if (fq->rq_status != BLK_STS_OK) {
error = fq->rq_status;
fq->rq_status = BLK_STS_OK;
}
if (!q->elevator) {
flush_rq->tag = BLK_MQ_NO_TAG;
......@@ -332,7 +341,6 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH;
flush_rq->cmd_flags |= (flags & REQ_DRV) | (flags & REQ_FAILFAST_MASK);
flush_rq->rq_flags |= RQF_FLUSH_SEQ;
flush_rq->rq_disk = first_rq->rq_disk;
flush_rq->end_io = flush_end_io;
/*
* Order WRITE ->end_io and WRITE rq->ref, and its pair is the one
......@@ -341,7 +349,7 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
* and READ flush_rq->end_io
*/
smp_wmb();
refcount_set(&flush_rq->ref, 1);
req_ref_set(flush_rq, 1);
blk_flush_queue_rq(flush_rq, false);
}
......
......@@ -411,7 +411,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
if (disk->queue->crypto_profile) {
pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
blk_crypto_unregister(disk->queue);
disk->queue->crypto_profile = NULL;
}
#endif
}
......
This diff is collapsed.
......@@ -62,6 +62,7 @@ struct ioprio_blkg {
struct ioprio_blkcg {
struct blkcg_policy_data cpd;
enum prio_policy prio_policy;
bool prio_set;
};
static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
......@@ -112,7 +113,7 @@ static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
if (ret < 0)
return ret;
blkcg->prio_policy = ret;
blkcg->prio_set = true;
return nbytes;
}
......@@ -190,6 +191,10 @@ static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
struct bio *bio)
{
struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
u16 prio;
if (!blkcg->prio_set)
return;
/*
* Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
......@@ -199,8 +204,10 @@ static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq,
* bio I/O priority is not modified. If the bio I/O priority equals
* IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio.
*/
bio->bi_ioprio = max_t(u16, bio->bi_ioprio,
IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
prio = max_t(u16, bio->bi_ioprio,
IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
if (prio > bio->bi_ioprio)
bio->bi_ioprio = prio;
}
static void blkcg_ioprio_exit(struct rq_qos *rqos)
......
......@@ -8,10 +8,12 @@
#include <linux/blkdev.h>
#include <linux/blk-integrity.h>
#include <linux/scatterlist.h>
#include <linux/part_stat.h>
#include <trace/events/block.h>
#include "blk.h"
#include "blk-mq-sched.h"
#include "blk-rq-qos.h"
#include "blk-throttle.h"
......@@ -775,8 +777,7 @@ static struct request *attempt_merge(struct request_queue *q,
if (req_op(req) != req_op(next))
return NULL;
if (rq_data_dir(req) != rq_data_dir(next)
|| req->rq_disk != next->rq_disk)
if (rq_data_dir(req) != rq_data_dir(next))
return NULL;
if (req_op(req) == REQ_OP_WRITE_SAME &&
......@@ -903,10 +904,6 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_data_dir(bio) != rq_data_dir(rq))
return false;
/* must be same device */
if (rq->rq_disk != bio->bi_bdev->bd_disk)
return false;
/* only merge integrity protected bio into ditto rq */
if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
return false;
......@@ -1067,7 +1064,6 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
* @q: request_queue new bio is being queued at
* @bio: new bio being queued
* @nr_segs: number of segments in @bio
* @same_queue_rq: output value, will be true if there's an existing request
* from the passed in @q already in the plug list
*
* Determine whether @bio being queued on @q can be merged with the previous
......@@ -1084,7 +1080,7 @@ static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
* Caller must ensure !blk_queue_nomerges(q) beforehand.
*/
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, bool *same_queue_rq)
unsigned int nr_segs)
{
struct blk_plug *plug;
struct request *rq;
......@@ -1096,12 +1092,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
/* check the previously added entry for a quick merge attempt */
rq = rq_list_peek(&plug->mq_list);
if (rq->q == q) {
/*
* Only blk-mq multiple hardware queues case checks the rq in
* the same queue, there should be only one such rq in a queue
*/
*same_queue_rq = true;
if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
BIO_MERGE_OK)
return true;
......
......@@ -11,6 +11,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
#include "blk-mq-sched.h"
#include "blk-mq-tag.h"
#include "blk-rq-qos.h"
......@@ -29,6 +30,9 @@ static int queue_poll_stat_show(void *data, struct seq_file *m)
struct request_queue *q = data;
int bucket;
if (!q->poll_stat)
return 0;
for (bucket = 0; bucket < (BLK_MQ_POLL_STATS_BKTS / 2); bucket++) {
seq_printf(m, "read (%d Bytes): ", 1 << (9 + bucket));
print_stat(m, &q->poll_stat[2 * bucket]);
......@@ -122,7 +126,6 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(FUA),
QUEUE_FLAG_NAME(DAX),
QUEUE_FLAG_NAME(STATS),
QUEUE_FLAG_NAME(POLL_STATS),
QUEUE_FLAG_NAME(REGISTERED),
QUEUE_FLAG_NAME(QUIESCED),
QUEUE_FLAG_NAME(PCI_P2PDMA),
......
......@@ -18,32 +18,6 @@
#include "blk-mq-tag.h"
#include "blk-wbt.h"
void blk_mq_sched_assign_ioc(struct request *rq)
{
struct request_queue *q = rq->q;
struct io_context *ioc;
struct io_cq *icq;
/*
* May not have an IO context if it's a passthrough request
*/
ioc = current->io_context;
if (!ioc)
return;
spin_lock_irq(&q->queue_lock);
icq = ioc_lookup_icq(ioc, q);
spin_unlock_irq(&q->queue_lock);
if (!icq) {
icq = ioc_create_icq(ioc, q, GFP_ATOMIC);
if (!icq)
return;
}
get_io_context(icq->ioc);
rq->elv.icq = icq;
}
/*
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
......@@ -501,7 +475,8 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
* us one extra enqueue & dequeue to sw queue.
*/
if (!hctx->dispatch_busy && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);
blk_mq_run_dispatch_ops(hctx->queue,
blk_mq_try_issue_list_directly(hctx, list));
if (list_empty(list))
goto out;
}
......
......@@ -8,8 +8,6 @@
#define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ)
void blk_mq_sched_assign_ioc(struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **merged_request);
bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
......
......@@ -36,8 +36,6 @@ static void blk_mq_hw_sysfs_release(struct kobject *kobj)
struct blk_mq_hw_ctx *hctx = container_of(kobj, struct blk_mq_hw_ctx,
kobj);
if (hctx->flags & BLK_MQ_F_BLOCKING)
cleanup_srcu_struct(hctx->srcu);
blk_free_flush_queue(hctx->fq);
sbitmap_free(&hctx->ctx_map);
free_cpumask_var(hctx->cpumask);
......
......@@ -215,7 +215,8 @@ void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
struct bt_iter_data {
struct blk_mq_hw_ctx *hctx;
busy_iter_fn *fn;
struct request_queue *q;
busy_tag_iter_fn *fn;
void *data;
bool reserved;
};
......@@ -228,7 +229,7 @@ static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
spin_lock_irqsave(&tags->lock, flags);
rq = tags->rqs[bitnr];
if (!rq || rq->tag != bitnr || !refcount_inc_not_zero(&rq->ref))
if (!rq || rq->tag != bitnr || !req_ref_inc_not_zero(rq))
rq = NULL;
spin_unlock_irqrestore(&tags->lock, flags);
return rq;
......@@ -238,11 +239,18 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
{
struct bt_iter_data *iter_data = data;
struct blk_mq_hw_ctx *hctx = iter_data->hctx;
struct blk_mq_tags *tags = hctx->tags;
struct request_queue *q = iter_data->q;
struct blk_mq_tag_set *set = q->tag_set;
bool reserved = iter_data->reserved;
struct blk_mq_tags *tags;
struct request *rq;
bool ret = true;
if (blk_mq_is_shared_tags(set->flags))
tags = set->shared_tags;
else
tags = hctx->tags;
if (!reserved)
bitnr += tags->nr_reserved_tags;
/*
......@@ -253,8 +261,8 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
if (!rq)
return true;
if (rq->q == hctx->queue && rq->mq_hctx == hctx)
ret = iter_data->fn(hctx, rq, iter_data->data, reserved);
if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
ret = iter_data->fn(rq, iter_data->data, reserved);
blk_mq_put_rq_ref(rq);
return ret;
}
......@@ -262,6 +270,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
/**
* bt_for_each - iterate over the requests associated with a hardware queue
* @hctx: Hardware queue to examine.
* @q: Request queue to examine.
* @bt: sbitmap to examine. This is either the breserved_tags member
* or the bitmap_tags member of struct blk_mq_tags.
* @fn: Pointer to the function that will be called for each request
......@@ -273,14 +282,16 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* @reserved: Indicates whether @bt is the breserved_tags member or the
* bitmap_tags member of struct blk_mq_tags.
*/
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
busy_iter_fn *fn, void *data, bool reserved)
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
void *data, bool reserved)
{
struct bt_iter_data iter_data = {
.hctx = hctx,
.fn = fn,
.data = data,
.reserved = reserved,
.q = q,
};
sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
......@@ -457,12 +468,9 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
* called for all requests on all queues that share that tag set and not only
* for requests associated with @q.
*/
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
void *priv)
{
struct blk_mq_hw_ctx *hctx;
int i;
/*
* __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
* while the queue is frozen. So we can use q_usage_counter to avoid
......@@ -471,19 +479,34 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
if (!percpu_ref_tryget(&q->q_usage_counter))
return;
queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
/*
* If no software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
if (!blk_mq_hw_queue_mapped(hctx))
continue;
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
struct blk_mq_tags *tags = q->tag_set->shared_tags;
struct sbitmap_queue *bresv = &tags->breserved_tags;
struct sbitmap_queue *btags = &tags->bitmap_tags;
if (tags->nr_reserved_tags)
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
bt_for_each(NULL, q, bresv, fn, priv, true);
bt_for_each(NULL, q, btags, fn, priv, false);
} else {
struct blk_mq_hw_ctx *hctx;
int i;
queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
struct sbitmap_queue *bresv = &tags->breserved_tags;
struct sbitmap_queue *btags = &tags->bitmap_tags;
/*
* If no software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
if (!blk_mq_hw_queue_mapped(hctx))
continue;
if (tags->nr_reserved_tags)
bt_for_each(hctx, q, bresv, fn, priv, true);
bt_for_each(hctx, q, btags, fn, priv, false);
}
}
blk_queue_exit(q);
}
......
......@@ -28,7 +28,7 @@ extern void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set,
extern void blk_mq_tag_update_sched_shared_tags(struct request_queue *q);
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
void *priv);
void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
void *priv);
......
This diff is collapsed.
......@@ -65,9 +65,6 @@ void blk_mq_request_bypass_insert(struct request *rq, bool at_head,
bool run_queue);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);
/* Used by blk_insert_cloned_request() to issue request directly */
blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last);
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list);
......@@ -377,5 +374,24 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
return __blk_mq_active_requests(hctx) < depth;
}
/* run the code block in @dispatch_ops with rcu/srcu read lock held */
#define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \
do { \
if (!blk_queue_has_srcu(q)) { \
rcu_read_lock(); \
(dispatch_ops); \
rcu_read_unlock(); \
} else { \
int srcu_idx; \
\
might_sleep_if(check_sleep); \
srcu_idx = srcu_read_lock((q)->srcu); \
(dispatch_ops); \
srcu_read_unlock((q)->srcu, srcu_idx); \
} \
} while (0)
#define blk_mq_run_dispatch_ops(q, dispatch_ops) \
__blk_mq_run_dispatch_ops(q, true, dispatch_ops) \
#endif
......@@ -15,7 +15,7 @@
struct blk_queue_stats {
struct list_head callbacks;
spinlock_t lock;
bool enable_accounting;
int accounting;
};
void blk_rq_stat_init(struct blk_rq_stat *stat)
......@@ -161,7 +161,7 @@ void blk_stat_remove_callback(struct request_queue *q,
spin_lock_irqsave(&q->stats->lock, flags);
list_del_rcu(&cb->list);
if (list_empty(&q->stats->callbacks) && !q->stats->enable_accounting)
if (list_empty(&q->stats->callbacks) && !q->stats->accounting)
blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
spin_unlock_irqrestore(&q->stats->lock, flags);
......@@ -184,13 +184,24 @@ void blk_stat_free_callback(struct blk_stat_callback *cb)
call_rcu(&cb->rcu, blk_stat_free_callback_rcu);
}
void blk_stat_disable_accounting(struct request_queue *q)
{
unsigned long flags;
spin_lock_irqsave(&q->stats->lock, flags);
if (!--q->stats->accounting)
blk_queue_flag_clear(QUEUE_FLAG_STATS, q);
spin_unlock_irqrestore(&q->stats->lock, flags);
}
EXPORT_SYMBOL_GPL(blk_stat_disable_accounting);
void blk_stat_enable_accounting(struct request_queue *q)
{
unsigned long flags;
spin_lock_irqsave(&q->stats->lock, flags);
q->stats->enable_accounting = true;
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
if (!q->stats->accounting++)
blk_queue_flag_set(QUEUE_FLAG_STATS, q);
spin_unlock_irqrestore(&q->stats->lock, flags);
}
EXPORT_SYMBOL_GPL(blk_stat_enable_accounting);
......@@ -205,7 +216,7 @@ struct blk_queue_stats *blk_alloc_queue_stats(void)
INIT_LIST_HEAD(&stats->callbacks);
spin_lock_init(&stats->lock);
stats->enable_accounting = false;
stats->accounting = 0;
return stats;
}
......@@ -219,3 +230,21 @@ void blk_free_queue_stats(struct blk_queue_stats *stats)
kfree(stats);
}
bool blk_stats_alloc_enable(struct request_queue *q)
{
struct blk_rq_stat *poll_stat;
poll_stat = kcalloc(BLK_MQ_POLL_STATS_BKTS, sizeof(*poll_stat),
GFP_ATOMIC);
if (!poll_stat)
return false;
if (cmpxchg(&q->poll_stat, NULL, poll_stat) != NULL) {
kfree(poll_stat);
return true;
}
blk_stat_add_callback(q, q->poll_cb);
return false;
}
......@@ -64,11 +64,13 @@ struct blk_stat_callback {
struct blk_queue_stats *blk_alloc_queue_stats(void);
void blk_free_queue_stats(struct blk_queue_stats *);
bool blk_stats_alloc_enable(struct request_queue *q);
void blk_stat_add(struct request *rq, u64 now);
/* record time/size info in request but not add a callback */
void blk_stat_enable_accounting(struct request_queue *q);
void blk_stat_disable_accounting(struct request_queue *q);
/**
* blk_stat_alloc_callback() - Allocate a block statistics callback.
......
......@@ -16,6 +16,7 @@
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
#include "blk-mq-sched.h"
#include "blk-wbt.h"
#include "blk-throttle.h"
......@@ -734,7 +735,8 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
{
struct request_queue *q = container_of(rcu_head, struct request_queue,
rcu_head);
kmem_cache_free(blk_requestq_cachep, q);
kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
}
/* Unconfigure the I/O scheduler and dissociate from the cgroup controller. */
......@@ -747,7 +749,7 @@ static void blk_exit_queue(struct request_queue *q)
*/
if (q->elevator) {
ioc_clear_queue(q);
__elevator_exit(q, q->elevator);
elevator_exit(q);
}
/*
......@@ -785,14 +787,15 @@ static void blk_release_queue(struct kobject *kobj)
might_sleep();
if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
if (q->poll_stat)
blk_stat_remove_callback(q, q->poll_cb);
blk_stat_free_callback(q->poll_cb);
blk_free_queue_stats(q->stats);
blk_exit_queue(q);
blk_free_queue_stats(q->stats);
kfree(q->poll_stat);
blk_queue_free_zone_bitmaps(q);
if (queue_is_mq(q))
......
......@@ -13,6 +13,7 @@
#include <linux/blk-cgroup.h>
#include "blk.h"
#include "blk-cgroup-rwstat.h"
#include "blk-stat.h"
#include "blk-throttle.h"
/* Max dispatch from a group in 1 round */
......
......@@ -2,15 +2,10 @@
#ifndef BLK_INTERNAL_H
#define BLK_INTERNAL_H
#include <linux/idr.h>
#include <linux/blk-mq.h>
#include <linux/part_stat.h>
#include <linux/blk-crypto.h>
#include <linux/memblock.h> /* for max_pfn/max_low_pfn */
#include <xen/xen.h>
#include "blk-crypto-internal.h"
#include "blk-mq.h"
#include "blk-mq-sched.h"
struct elevator_type;
......@@ -32,15 +27,10 @@ struct blk_flush_queue {
};
extern struct kmem_cache *blk_requestq_cachep;
extern struct kmem_cache *blk_requestq_srcu_cachep;
extern struct kobj_type blk_queue_ktype;
extern struct ida blk_queue_ida;
static inline struct blk_flush_queue *
blk_get_flush_queue(struct request_queue *q, struct blk_mq_ctx *ctx)
{
return blk_mq_map_queue(q, REQ_OP_FLUSH, ctx)->fq;
}
static inline void __blk_get_queue(struct request_queue *q)
{
kobject_get(&q->kobj);
......@@ -250,16 +240,13 @@ static inline void blk_integrity_del(struct gendisk *disk)
unsigned long blk_rq_timeout(unsigned long timeout);
void blk_add_timer(struct request *req);
void blk_print_req_error(struct request *req, blk_status_t status);
const char *blk_status_to_str(blk_status_t status);
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, bool *same_queue_rq);
unsigned int nr_segs);
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
struct bio *bio, unsigned int nr_segs);
void __blk_account_io_start(struct request *req);
void __blk_account_io_done(struct request *req, u64 now);
/*
* Plug flush limits
*/
......@@ -275,19 +262,10 @@ void blk_insert_flush(struct request *rq);
int elevator_switch_mq(struct request_queue *q,
struct elevator_type *new_e);
void __elevator_exit(struct request_queue *, struct elevator_queue *);
void elevator_exit(struct request_queue *q);
int elv_register_queue(struct request_queue *q, bool uevent);
void elv_unregister_queue(struct request_queue *q);
static inline void elevator_exit(struct request_queue *q,
struct elevator_queue *e)
{
lockdep_assert_held(&q->sysfs_lock);
blk_mq_sched_free_rqs(q);
__elevator_exit(q, e);
}
ssize_t part_size_show(struct device *dev, struct device_attribute *attr,
char *buf);
ssize_t part_stat_show(struct device *dev, struct device_attribute *attr,
......@@ -347,26 +325,10 @@ int blk_dev_init(void);
*/
static inline bool blk_do_io_stat(struct request *rq)
{
return (rq->rq_flags & RQF_IO_STAT) && rq->rq_disk;
}
static inline void blk_account_io_done(struct request *req, u64 now)
{
/*
* Account IO completion. flush_rq isn't accounted as a
* normal IO on queueing nor completion. Accounting the
* containing request is enough.
*/
if (blk_do_io_stat(req) && req->part &&
!(req->rq_flags & RQF_FLUSH_SEQ))
__blk_account_io_done(req, now);
return (rq->rq_flags & RQF_IO_STAT) && rq->q->disk;
}
static inline void blk_account_io_start(struct request *req)
{
if (blk_do_io_stat(req))
__blk_account_io_start(req);
}
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
{
......@@ -402,13 +364,15 @@ static inline unsigned int bio_aligned_discard_max_sectors(
/*
* Internal io_context interface
*/
void get_io_context(struct io_context *ioc);
struct io_cq *ioc_lookup_icq(struct io_context *ioc, struct request_queue *q);
struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
gfp_t gfp_mask);
struct io_cq *ioc_find_get_icq(struct request_queue *q);
struct io_cq *ioc_lookup_icq(struct request_queue *q);
#ifdef CONFIG_BLK_ICQ
void ioc_clear_queue(struct request_queue *q);
int create_task_io_context(struct task_struct *task, gfp_t gfp_mask, int node);
#else
static inline void ioc_clear_queue(struct request_queue *q)
{
}
#endif /* CONFIG_BLK_ICQ */
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
extern ssize_t blk_throtl_sample_time_show(struct request_queue *q, char *page);
......@@ -467,7 +431,15 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page);
struct request_queue *blk_alloc_queue(int node_id);
static inline struct kmem_cache *blk_get_queue_kmem_cache(bool srcu)
{
if (srcu)
return blk_requestq_srcu_cachep;
return blk_requestq_cachep;
}
struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
int disk_alloc_events(struct gendisk *disk);
void disk_add_events(struct gendisk *disk);
......@@ -493,4 +465,45 @@ int disk_register_independent_access_ranges(struct gendisk *disk,
struct blk_independent_access_ranges *new_iars);
void disk_unregister_independent_access_ranges(struct gendisk *disk);
#ifdef CONFIG_FAIL_MAKE_REQUEST
bool should_fail_request(struct block_device *part, unsigned int bytes);
#else /* CONFIG_FAIL_MAKE_REQUEST */
static inline bool should_fail_request(struct block_device *part,
unsigned int bytes)
{
return false;
}
#endif /* CONFIG_FAIL_MAKE_REQUEST */
/*
* Optimized request reference counting. Ideally we'd make timeouts be more
* clever, as that's the only reason we need references at all... But until
* this happens, this is faster than using refcount_t. Also see:
*
* abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count")
*/
#define req_ref_zero_or_close_to_overflow(req) \
((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u)
static inline bool req_ref_inc_not_zero(struct request *req)
{
return atomic_inc_not_zero(&req->ref);
}
static inline bool req_ref_put_and_test(struct request *req)
{
WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
return atomic_dec_and_test(&req->ref);
}
static inline void req_ref_set(struct request *req, int value)
{
atomic_set(&req->ref, value);
}
static inline int req_ref_read(struct request *req)
{
return atomic_read(&req->ref);
}
#endif /* BLK_INTERNAL_H */
......@@ -92,7 +92,7 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
goto out_unmap_bidi_rq;
bio = rq->bio;
blk_execute_rq(NULL, rq, !(hdr->flags & BSG_FLAG_Q_AT_TAIL));
blk_execute_rq(rq, !(hdr->flags & BSG_FLAG_Q_AT_TAIL));
/*
* The assignments below don't make much sense, but are kept for
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment