Commit 7c24d9f3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.5/core' of git://git.kernel.dk/linux-block

Pull core block updates from Jens Axboe:
 "We don't have a lot of core changes this time around, it's mostly in
  drivers, which will come in a subsequent pull.

  The cores changes include:

   - blk-mq
        - Prep patch from Christoph, changing blk_mq_alloc_request() to
          take flags instead of just using gfp_t for sleep/nosleep.
        - Doc patch from me, clarifying the difference between legacy
          and blk-mq for timer usage.
        - Fixes from Raghavendra for memory-less numa nodes, and a reuse
          of CPU masks.

   - Cleanup from Geliang Tang, using offset_in_page() instead of open
     coding it.

   - From Ilya, rename request_queue slab to it reflects what it holds,
     and a fix for proper use of bdgrab/put.

   - A real fix for the split across stripe boundaries from Keith.  We
     yanked a broken version of this from 4.4-rc final, this one works.

   - From Mike Krinkin, emit a trace message when we split.

   - From Wei Tang, two small cleanups, not explicitly clearing memory
     that is already cleared"

* 'for-4.5/core' of git://git.kernel.dk/linux-block:
  block: use bd{grab,put}() instead of open-coding
  block: split bios to max possible length
  block: add call to split trace point
  blk-mq: Avoid memoryless numa node encoded in hctx numa_node
  blk-mq: Reuse hardware context cpumask for tags
  blk-mq: add a flags parameter to blk_mq_alloc_request
  Revert "blk-flush: Queue through IO scheduler when flush not required"
  block: clarify blk_add_timer() use case for blk-mq
  bio: use offset_in_page macro
  block: do not initialise statics to 0 or NULL
  block: do not initialise globals to 0 or NULL
  block: rename request_queue slab cache
parents 99e38df8 ed8a9d2c
......@@ -1125,7 +1125,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
int i, ret;
int nr_pages = 0;
unsigned int len = iter->count;
unsigned int offset = map_data ? map_data->offset & ~PAGE_MASK : 0;
unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0;
for (i = 0; i < iter->nr_segs; i++) {
unsigned long uaddr;
......@@ -1304,7 +1304,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
goto out_unmap;
}
offset = uaddr & ~PAGE_MASK;
offset = offset_in_page(uaddr);
for (j = cur_page; j < page_limit; j++) {
unsigned int bytes = PAGE_SIZE - offset;
......
......@@ -51,7 +51,7 @@ DEFINE_IDA(blk_queue_ida);
/*
* For the allocated request tables
*/
struct kmem_cache *request_cachep = NULL;
struct kmem_cache *request_cachep;
/*
* For queue allocation
......@@ -646,7 +646,7 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
}
EXPORT_SYMBOL(blk_alloc_queue);
int blk_queue_enter(struct request_queue *q, gfp_t gfp)
int blk_queue_enter(struct request_queue *q, bool nowait)
{
while (true) {
int ret;
......@@ -654,7 +654,7 @@ int blk_queue_enter(struct request_queue *q, gfp_t gfp)
if (percpu_ref_tryget_live(&q->q_usage_counter))
return 0;
if (!gfpflags_allow_blocking(gfp))
if (nowait)
return -EBUSY;
ret = wait_event_interruptible(q->mq_freeze_wq,
......@@ -1292,7 +1292,9 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
{
if (q->mq_ops)
return blk_mq_alloc_request(q, rw, gfp_mask, false);
return blk_mq_alloc_request(q, rw,
(gfp_mask & __GFP_DIRECT_RECLAIM) ?
0 : BLK_MQ_REQ_NOWAIT);
else
return blk_old_get_request(q, rw, gfp_mask);
}
......@@ -2060,8 +2062,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
if (likely(blk_queue_enter(q, __GFP_DIRECT_RECLAIM) == 0)) {
if (likely(blk_queue_enter(q, false) == 0)) {
ret = q->make_request_fn(q, bio);
blk_queue_exit(q);
......@@ -3534,7 +3535,7 @@ int __init blk_dev_init(void)
request_cachep = kmem_cache_create("blkdev_requests",
sizeof(struct request), 0, SLAB_PANIC, NULL);
blk_requestq_cachep = kmem_cache_create("blkdev_queue",
blk_requestq_cachep = kmem_cache_create("request_queue",
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
return 0;
......
......@@ -7,6 +7,8 @@
#include <linux/blkdev.h>
#include <linux/scatterlist.h>
#include <trace/events/block.h>
#include "blk.h"
static struct bio *blk_bio_discard_split(struct request_queue *q,
......@@ -81,9 +83,6 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
struct bio *new = NULL;
bio_for_each_segment(bv, bio, iter) {
if (sectors + (bv.bv_len >> 9) > queue_max_sectors(q))
goto split;
/*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, disallow it.
......@@ -91,6 +90,22 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
goto split;
if (sectors + (bv.bv_len >> 9) >
blk_max_size_offset(q, bio->bi_iter.bi_sector)) {
/*
* Consider this a new segment if we're splitting in
* the middle of this vector.
*/
if (nsegs < queue_max_segments(q) &&
sectors < blk_max_size_offset(q,
bio->bi_iter.bi_sector)) {
nsegs++;
sectors = blk_max_size_offset(q,
bio->bi_iter.bi_sector);
}
goto split;
}
if (bvprvp && blk_queue_cluster(q)) {
if (seg_size + bv.bv_len > queue_max_segment_size(q))
goto new_segment;
......@@ -162,6 +177,7 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
split->bi_rw |= REQ_NOMERGE;
bio_chain(split, *bio);
trace_block_split(q, split, (*bio)->bi_iter.bi_sector);
generic_make_request(*bio);
*bio = split;
}
......
......@@ -113,7 +113,7 @@ int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index)
for_each_possible_cpu(i) {
if (index == mq_map[i])
return cpu_to_node(i);
return local_memory_node(cpu_to_node(i));
}
return NUMA_NO_NODE;
......
......@@ -268,7 +268,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
if (tag != -1)
return tag;
if (!gfpflags_allow_blocking(data->gfp))
if (data->flags & BLK_MQ_REQ_NOWAIT)
return -1;
bs = bt_wait_ptr(bt, hctx);
......@@ -303,7 +303,7 @@ static int bt_get(struct blk_mq_alloc_data *data,
data->ctx = blk_mq_get_ctx(data->q);
data->hctx = data->q->mq_ops->map_queue(data->q,
data->ctx->cpu);
if (data->reserved) {
if (data->flags & BLK_MQ_REQ_RESERVED) {
bt = &data->hctx->tags->breserved_tags;
} else {
last_tag = &data->ctx->last_tag;
......@@ -349,10 +349,9 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_alloc_data *data)
unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data)
{
if (!data->reserved)
return __blk_mq_get_tag(data);
if (data->flags & BLK_MQ_REQ_RESERVED)
return __blk_mq_get_reserved_tag(data);
return __blk_mq_get_tag(data);
}
static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
......
......@@ -229,8 +229,8 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
return NULL;
}
struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
bool reserved)
struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
unsigned int flags)
{
struct blk_mq_ctx *ctx;
struct blk_mq_hw_ctx *hctx;
......@@ -238,24 +238,22 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
struct blk_mq_alloc_data alloc_data;
int ret;
ret = blk_queue_enter(q, gfp);
ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
if (ret)
return ERR_PTR(ret);
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, gfp & ~__GFP_DIRECT_RECLAIM,
reserved, ctx, hctx);
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
if (!rq && (gfp & __GFP_DIRECT_RECLAIM)) {
if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
__blk_mq_run_hw_queue(hctx);
blk_mq_put_ctx(ctx);
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, gfp, reserved, ctx,
hctx);
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
ctx = alloc_data.ctx;
}
......@@ -1175,8 +1173,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
rw |= REQ_SYNC;
trace_block_getrq(q, bio, rw);
blk_mq_set_alloc_data(&alloc_data, q, GFP_ATOMIC, false, ctx,
hctx);
blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
if (unlikely(!rq)) {
__blk_mq_run_hw_queue(hctx);
......@@ -1185,8 +1182,7 @@ static struct request *blk_mq_map_request(struct request_queue *q,
ctx = blk_mq_get_ctx(q);
hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q,
__GFP_RECLAIM|__GFP_HIGH, false, ctx, hctx);
blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw);
ctx = alloc_data.ctx;
hctx = alloc_data.hctx;
......@@ -1794,7 +1790,7 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
* not, we remain on the home node of the device
*/
if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
hctx->numa_node = cpu_to_node(i);
hctx->numa_node = local_memory_node(cpu_to_node(i));
}
}
......@@ -1854,6 +1850,7 @@ static void blk_mq_map_swqueue(struct request_queue *q,
hctx->tags = set->tags[i];
WARN_ON(!hctx->tags);
cpumask_copy(hctx->tags->cpumask, hctx->cpumask);
/*
* Set the map size to the number of mapped software queues.
* This is more accurate and more efficient than looping
......@@ -1867,14 +1864,6 @@ static void blk_mq_map_swqueue(struct request_queue *q,
hctx->next_cpu = cpumask_first(hctx->cpumask);
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
}
queue_for_each_ctx(q, ctx, i) {
if (!cpumask_test_cpu(i, online_mask))
continue;
hctx = q->mq_ops->map_queue(q, i);
cpumask_set_cpu(i, hctx->tags->cpumask);
}
}
static void queue_set_hctx_shared(struct request_queue *q, bool shared)
......
......@@ -96,8 +96,7 @@ static inline void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
struct blk_mq_alloc_data {
/* input parameter */
struct request_queue *q;
gfp_t gfp;
bool reserved;
unsigned int flags;
/* input & output parameter */
struct blk_mq_ctx *ctx;
......@@ -105,13 +104,11 @@ struct blk_mq_alloc_data {
};
static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
struct request_queue *q, gfp_t gfp, bool reserved,
struct blk_mq_ctx *ctx,
struct blk_mq_hw_ctx *hctx)
struct request_queue *q, unsigned int flags,
struct blk_mq_ctx *ctx, struct blk_mq_hw_ctx *hctx)
{
data->q = q;
data->gfp = gfp;
data->reserved = reserved;
data->flags = flags;
data->ctx = ctx;
data->hctx = hctx;
}
......
......@@ -186,6 +186,7 @@ unsigned long blk_rq_timeout(unsigned long timeout)
* Notes:
* Each request has its own timer, and as it is added to the queue, we
* set up the timer. When the request completes, we cancel the timer.
* Queue lock must be held for the non-mq case, mq case doesn't care.
*/
void blk_add_timer(struct request *req)
{
......@@ -209,6 +210,11 @@ void blk_add_timer(struct request *req)
req->timeout = q->rq_timeout;
req->deadline = jiffies + req->timeout;
/*
* Only the non-mq case needs to add the request to a protected list.
* For the mq case we simply scan the tag map.
*/
if (!q->mq_ops)
list_add_tail(&req->timeout_list, &req->q->timeout_list);
......
......@@ -1449,7 +1449,7 @@ static DEFINE_MUTEX(disk_events_mutex);
static LIST_HEAD(disk_events);
/* disable in-kernel polling by default */
static unsigned long disk_events_dfl_poll_msecs = 0;
static unsigned long disk_events_dfl_poll_msecs;
static unsigned long disk_events_poll_jiffies(struct gendisk *disk)
{
......
......@@ -173,7 +173,7 @@ static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd)
{
struct request *rq;
rq = blk_mq_alloc_request(dd->queue, 0, __GFP_RECLAIM, true);
rq = blk_mq_alloc_request(dd->queue, 0, BLK_MQ_REQ_RESERVED);
return blk_mq_rq_to_pdu(rq);
}
......
......@@ -449,7 +449,7 @@ static int null_lnvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
struct request *rq;
struct bio *bio = rqd->bio;
rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
rq = blk_mq_alloc_request(q, bio_rw(bio), 0);
if (IS_ERR(rq))
return -ENOMEM;
......
......@@ -71,7 +71,7 @@ static int raw_open(struct inode *inode, struct file *filp)
err = -ENODEV;
if (!bdev)
goto out;
igrab(bdev->bd_inode);
bdgrab(bdev);
err = blkdev_get(bdev, filp->f_mode | FMODE_EXCL, raw_open);
if (err)
goto out;
......
......@@ -471,7 +471,7 @@ static int nvme_nvm_submit_io(struct nvm_dev *dev, struct nvm_rq *rqd)
struct bio *bio = rqd->bio;
struct nvme_nvm_command *cmd;
rq = blk_mq_alloc_request(q, bio_rw(bio), GFP_KERNEL, 0);
rq = blk_mq_alloc_request(q, bio_rw(bio), 0);
if (IS_ERR(rq))
return -ENOMEM;
......
......@@ -1041,7 +1041,7 @@ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
struct request *req;
int ret;
req = blk_mq_alloc_request(q, write, GFP_KERNEL, false);
req = blk_mq_alloc_request(q, write, 0);
if (IS_ERR(req))
return PTR_ERR(req);
......@@ -1094,7 +1094,8 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
struct nvme_cmd_info *cmd_info;
struct request *req;
req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true);
req = blk_mq_alloc_request(dev->admin_q, WRITE,
BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED);
if (IS_ERR(req))
return PTR_ERR(req);
......@@ -1119,7 +1120,7 @@ static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
struct request *req;
struct nvme_cmd_info *cmd_rq;
req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
req = blk_mq_alloc_request(dev->admin_q, WRITE, 0);
if (IS_ERR(req))
return PTR_ERR(req);
......@@ -1320,8 +1321,8 @@ static void nvme_abort_req(struct request *req)
if (!dev->abort_limit)
return;
abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC,
false);
abort_req = blk_mq_alloc_request(dev->admin_q, WRITE,
BLK_MQ_REQ_NOWAIT);
if (IS_ERR(abort_req))
return;
......
......@@ -400,7 +400,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
if (!ops->rw_page || bdev_get_integrity(bdev))
return result;
result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL);
result = blk_queue_enter(bdev->bd_queue, false);
if (result)
return result;
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
......@@ -437,7 +437,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
if (!ops->rw_page || bdev_get_integrity(bdev))
return -EOPNOTSUPP;
result = blk_queue_enter(bdev->bd_queue, GFP_NOIO);
result = blk_queue_enter(bdev->bd_queue, false);
if (result)
return result;
......@@ -700,7 +700,7 @@ static struct block_device *bd_acquire(struct inode *inode)
spin_lock(&bdev_lock);
bdev = inode->i_bdev;
if (bdev) {
ihold(bdev->bd_inode);
bdgrab(bdev);
spin_unlock(&bdev_lock);
return bdev;
}
......@@ -716,7 +716,7 @@ static struct block_device *bd_acquire(struct inode *inode)
* So, we can access it via ->i_mapping always
* without igrab().
*/
ihold(bdev->bd_inode);
bdgrab(bdev);
inode->i_bdev = bdev;
inode->i_mapping = bdev->bd_inode->i_mapping;
list_add(&inode->i_devices, &bdev->bd_inodes);
......@@ -739,7 +739,7 @@ void bd_forget(struct inode *inode)
spin_unlock(&bdev_lock);
if (bdev)
iput(bdev->bd_inode);
bdput(bdev);
}
/**
......
......@@ -188,8 +188,14 @@ void blk_mq_insert_request(struct request *, bool, bool, bool);
void blk_mq_free_request(struct request *rq);
void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
enum {
BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */
BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
};
struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
gfp_t gfp, bool reserved);
unsigned int flags);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags);
......
......@@ -795,7 +795,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t,
extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
struct scsi_ioctl_command __user *);
extern int blk_queue_enter(struct request_queue *q, gfp_t gfp);
extern int blk_queue_enter(struct request_queue *q, bool nowait);
extern void blk_queue_exit(struct request_queue *q);
extern void blk_start_queue(struct request_queue *q);
extern void blk_start_queue_async(struct request_queue *q);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment