Commit 64b28683 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20180204' of git://git.kernel.dk/linux-block

Pull more block updates from Jens Axboe:
 "Most of this is fixes and not new code/features:

   - skd fix from Arnd, fixing a build error dependent on sla allocator
     type.

   - blk-mq scheduler discard merging fixes, one from me and one from
     Keith. This fixes a segment miscalculation for blk-mq-sched, where
     we mistakenly think two segments are physically contigious even
     though the request isn't carrying real data. Also fixes a bio-to-rq
     merge case.

   - Don't re-set a bit on the buffer_head flags, if it's already set.
     This can cause scalability concerns on bigger machines and
     workloads. From Kemi Wang.

   - Add BLK_STS_DEV_RESOURCE return value to blk-mq, allowing us to
     distuingish between a local (device related) resource starvation
     and a global one. The latter might happen without IO being in
     flight, so it has to be handled a bit differently. From Ming"

* tag 'for-linus-20180204' of git://git.kernel.dk/linux-block:
  block: skd: fix incorrect linux/slab_def.h inclusion
  buffer: Avoid setting buffer bits that are already set
  blk-mq-sched: Enable merging discard bio into request
  blk-mq: fix discard merge with scheduler attached
  blk-mq: introduce BLK_STS_DEV_RESOURCE
parents d3658c22 1d518775
...@@ -145,6 +145,7 @@ static const struct { ...@@ -145,6 +145,7 @@ static const struct {
[BLK_STS_MEDIUM] = { -ENODATA, "critical medium" }, [BLK_STS_MEDIUM] = { -ENODATA, "critical medium" },
[BLK_STS_PROTECTION] = { -EILSEQ, "protection" }, [BLK_STS_PROTECTION] = { -EILSEQ, "protection" },
[BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" }, [BLK_STS_RESOURCE] = { -ENOMEM, "kernel resource" },
[BLK_STS_DEV_RESOURCE] = { -EBUSY, "device resource" },
[BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" }, [BLK_STS_AGAIN] = { -EAGAIN, "nonblocking retry" },
/* device mapper special case, should not leak out: */ /* device mapper special case, should not leak out: */
...@@ -3282,6 +3283,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, ...@@ -3282,6 +3283,8 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
{ {
if (bio_has_data(bio)) if (bio_has_data(bio))
rq->nr_phys_segments = bio_phys_segments(q, bio); rq->nr_phys_segments = bio_phys_segments(q, bio);
else if (bio_op(bio) == REQ_OP_DISCARD)
rq->nr_phys_segments = 1;
rq->__data_len = bio->bi_iter.bi_size; rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio; rq->bio = rq->biotail = bio;
......
...@@ -550,6 +550,24 @@ static bool req_no_special_merge(struct request *req) ...@@ -550,6 +550,24 @@ static bool req_no_special_merge(struct request *req)
return !q->mq_ops && req->special; return !q->mq_ops && req->special;
} }
static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
struct request *next)
{
unsigned short segments = blk_rq_nr_discard_segments(req);
if (segments >= queue_max_discard_segments(q))
goto no_merge;
if (blk_rq_sectors(req) + bio_sectors(next->bio) >
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
goto no_merge;
req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
return true;
no_merge:
req_set_nomerge(q, req);
return false;
}
static int ll_merge_requests_fn(struct request_queue *q, struct request *req, static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
struct request *next) struct request *next)
{ {
...@@ -683,9 +701,13 @@ static struct request *attempt_merge(struct request_queue *q, ...@@ -683,9 +701,13 @@ static struct request *attempt_merge(struct request_queue *q,
* If we are allowed to merge, then append bio list * If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn * from next to rq and release next. merge_requests_fn
* will have updated segment counts, update sector * will have updated segment counts, update sector
* counts here. * counts here. Handle DISCARDs separately, as they
* have separate settings.
*/ */
if (!ll_merge_requests_fn(q, req, next)) if (req_op(req) == REQ_OP_DISCARD) {
if (!req_attempt_discard_merge(q, req, next))
return NULL;
} else if (!ll_merge_requests_fn(q, req, next))
return NULL; return NULL;
/* /*
...@@ -715,7 +737,8 @@ static struct request *attempt_merge(struct request_queue *q, ...@@ -715,7 +737,8 @@ static struct request *attempt_merge(struct request_queue *q,
req->__data_len += blk_rq_bytes(next); req->__data_len += blk_rq_bytes(next);
elv_merge_requests(q, req, next); if (req_op(req) != REQ_OP_DISCARD)
elv_merge_requests(q, req, next);
/* /*
* 'next' is going away, so update stats accordingly * 'next' is going away, so update stats accordingly
......
...@@ -259,6 +259,8 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, ...@@ -259,6 +259,8 @@ bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
if (!*merged_request) if (!*merged_request)
elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
return true; return true;
case ELEVATOR_DISCARD_MERGE:
return bio_attempt_discard_merge(q, rq, bio);
default: default:
return false; return false;
} }
......
...@@ -1162,6 +1162,8 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx, ...@@ -1162,6 +1162,8 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx,
return true; return true;
} }
#define BLK_MQ_RESOURCE_DELAY 3 /* ms units */
bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
bool got_budget) bool got_budget)
{ {
...@@ -1169,6 +1171,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, ...@@ -1169,6 +1171,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
struct request *rq, *nxt; struct request *rq, *nxt;
bool no_tag = false; bool no_tag = false;
int errors, queued; int errors, queued;
blk_status_t ret = BLK_STS_OK;
if (list_empty(list)) if (list_empty(list))
return false; return false;
...@@ -1181,7 +1184,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, ...@@ -1181,7 +1184,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
errors = queued = 0; errors = queued = 0;
do { do {
struct blk_mq_queue_data bd; struct blk_mq_queue_data bd;
blk_status_t ret;
rq = list_first_entry(list, struct request, queuelist); rq = list_first_entry(list, struct request, queuelist);
if (!blk_mq_get_driver_tag(rq, &hctx, false)) { if (!blk_mq_get_driver_tag(rq, &hctx, false)) {
...@@ -1226,7 +1228,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, ...@@ -1226,7 +1228,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
} }
ret = q->mq_ops->queue_rq(hctx, &bd); ret = q->mq_ops->queue_rq(hctx, &bd);
if (ret == BLK_STS_RESOURCE) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
/* /*
* If an I/O scheduler has been configured and we got a * If an I/O scheduler has been configured and we got a
* driver tag for the next request already, free it * driver tag for the next request already, free it
...@@ -1257,6 +1259,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, ...@@ -1257,6 +1259,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
* that is where we will continue on next queue run. * that is where we will continue on next queue run.
*/ */
if (!list_empty(list)) { if (!list_empty(list)) {
bool needs_restart;
spin_lock(&hctx->lock); spin_lock(&hctx->lock);
list_splice_init(list, &hctx->dispatch); list_splice_init(list, &hctx->dispatch);
spin_unlock(&hctx->lock); spin_unlock(&hctx->lock);
...@@ -1280,10 +1284,17 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, ...@@ -1280,10 +1284,17 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
* - Some but not all block drivers stop a queue before * - Some but not all block drivers stop a queue before
* returning BLK_STS_RESOURCE. Two exceptions are scsi-mq * returning BLK_STS_RESOURCE. Two exceptions are scsi-mq
* and dm-rq. * and dm-rq.
*
* If driver returns BLK_STS_RESOURCE and SCHED_RESTART
* bit is set, run queue after a delay to avoid IO stalls
* that could otherwise occur if the queue is idle.
*/ */
if (!blk_mq_sched_needs_restart(hctx) || needs_restart = blk_mq_sched_needs_restart(hctx);
if (!needs_restart ||
(no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) (no_tag && list_empty_careful(&hctx->dispatch_wait.entry)))
blk_mq_run_hw_queue(hctx, true); blk_mq_run_hw_queue(hctx, true);
else if (needs_restart && (ret == BLK_STS_RESOURCE))
blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY);
} }
return (queued + errors) != 0; return (queued + errors) != 0;
...@@ -1764,6 +1775,7 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx, ...@@ -1764,6 +1775,7 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
*cookie = new_cookie; *cookie = new_cookie;
break; break;
case BLK_STS_RESOURCE: case BLK_STS_RESOURCE:
case BLK_STS_DEV_RESOURCE:
__blk_mq_requeue_request(rq); __blk_mq_requeue_request(rq);
break; break;
default: default:
...@@ -1826,7 +1838,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, ...@@ -1826,7 +1838,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
hctx_lock(hctx, &srcu_idx); hctx_lock(hctx, &srcu_idx);
ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false); ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
if (ret == BLK_STS_RESOURCE) if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
blk_mq_sched_insert_request(rq, false, true, false); blk_mq_sched_insert_request(rq, false, true, false);
else if (ret != BLK_STS_OK) else if (ret != BLK_STS_OK)
blk_mq_end_request(rq, ret); blk_mq_end_request(rq, ret);
......
...@@ -1230,7 +1230,7 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) ...@@ -1230,7 +1230,7 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
return BLK_STS_OK; return BLK_STS_OK;
} else } else
/* requeue request */ /* requeue request */
return BLK_STS_RESOURCE; return BLK_STS_DEV_RESOURCE;
} }
} }
......
...@@ -32,7 +32,6 @@ ...@@ -32,7 +32,6 @@
#include <linux/aer.h> #include <linux/aer.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/stringify.h> #include <linux/stringify.h>
#include <linux/slab_def.h>
#include <scsi/scsi.h> #include <scsi/scsi.h>
#include <scsi/sg.h> #include <scsi/sg.h>
#include <linux/io.h> #include <linux/io.h>
...@@ -2603,7 +2602,8 @@ static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s, ...@@ -2603,7 +2602,8 @@ static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s,
buf = kmem_cache_alloc(s, gfp); buf = kmem_cache_alloc(s, gfp);
if (!buf) if (!buf)
return NULL; return NULL;
*dma_handle = dma_map_single(dev, buf, s->size, dir); *dma_handle = dma_map_single(dev, buf,
kmem_cache_size(s), dir);
if (dma_mapping_error(dev, *dma_handle)) { if (dma_mapping_error(dev, *dma_handle)) {
kmem_cache_free(s, buf); kmem_cache_free(s, buf);
buf = NULL; buf = NULL;
...@@ -2618,7 +2618,8 @@ static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s, ...@@ -2618,7 +2618,8 @@ static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s,
if (!vaddr) if (!vaddr)
return; return;
dma_unmap_single(&skdev->pdev->dev, dma_handle, s->size, dir); dma_unmap_single(&skdev->pdev->dev, dma_handle,
kmem_cache_size(s), dir);
kmem_cache_free(s, vaddr); kmem_cache_free(s, vaddr);
} }
......
...@@ -276,7 +276,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -276,7 +276,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
/* Out of mem doesn't actually happen, since we fall back /* Out of mem doesn't actually happen, since we fall back
* to direct descriptors */ * to direct descriptors */
if (err == -ENOMEM || err == -ENOSPC) if (err == -ENOMEM || err == -ENOSPC)
return BLK_STS_RESOURCE; return BLK_STS_DEV_RESOURCE;
return BLK_STS_IOERR; return BLK_STS_IOERR;
} }
......
...@@ -911,7 +911,7 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -911,7 +911,7 @@ static blk_status_t blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
out_busy: out_busy:
blk_mq_stop_hw_queue(hctx); blk_mq_stop_hw_queue(hctx);
spin_unlock_irqrestore(&rinfo->ring_lock, flags); spin_unlock_irqrestore(&rinfo->ring_lock, flags);
return BLK_STS_RESOURCE; return BLK_STS_DEV_RESOURCE;
} }
static void blkif_complete_rq(struct request *rq) static void blkif_complete_rq(struct request *rq)
......
...@@ -408,7 +408,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ ...@@ -408,7 +408,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ
clone->start_time = jiffies; clone->start_time = jiffies;
r = blk_insert_cloned_request(clone->q, clone); r = blk_insert_cloned_request(clone->q, clone);
if (r != BLK_STS_OK && r != BLK_STS_RESOURCE) if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
/* must complete clone in terms of original request */ /* must complete clone in terms of original request */
dm_complete_request(rq, r); dm_complete_request(rq, r);
return r; return r;
...@@ -500,7 +500,7 @@ static int map_request(struct dm_rq_target_io *tio) ...@@ -500,7 +500,7 @@ static int map_request(struct dm_rq_target_io *tio)
trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
blk_rq_pos(rq)); blk_rq_pos(rq));
ret = dm_dispatch_clone_request(clone, rq); ret = dm_dispatch_clone_request(clone, rq);
if (ret == BLK_STS_RESOURCE) { if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
blk_rq_unprep_clone(clone); blk_rq_unprep_clone(clone);
tio->ti->type->release_clone_rq(clone); tio->ti->type->release_clone_rq(clone);
tio->clone = NULL; tio->clone = NULL;
...@@ -772,7 +772,6 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -772,7 +772,6 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
/* Undo dm_start_request() before requeuing */ /* Undo dm_start_request() before requeuing */
rq_end_stats(md, rq); rq_end_stats(md, rq);
rq_completed(md, rq_data_dir(rq), false); rq_completed(md, rq_data_dir(rq), false);
blk_mq_delay_run_hw_queue(hctx, 100/*ms*/);
return BLK_STS_RESOURCE; return BLK_STS_RESOURCE;
} }
......
...@@ -35,8 +35,6 @@ enum nvme_fc_queue_flags { ...@@ -35,8 +35,6 @@ enum nvme_fc_queue_flags {
NVME_FC_Q_LIVE, NVME_FC_Q_LIVE,
}; };
#define NVMEFC_QUEUE_DELAY 3 /* ms units */
#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
struct nvme_fc_queue { struct nvme_fc_queue {
...@@ -2231,7 +2229,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, ...@@ -2231,7 +2229,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
* the target device is present * the target device is present
*/ */
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
goto busy; return BLK_STS_RESOURCE;
if (!nvme_fc_ctrl_get(ctrl)) if (!nvme_fc_ctrl_get(ctrl))
return BLK_STS_IOERR; return BLK_STS_IOERR;
...@@ -2311,16 +2309,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, ...@@ -2311,16 +2309,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
ret != -EBUSY) ret != -EBUSY)
return BLK_STS_IOERR; return BLK_STS_IOERR;
goto busy; return BLK_STS_RESOURCE;
} }
return BLK_STS_OK; return BLK_STS_OK;
busy:
if (!(op->flags & FCOP_FLAGS_AEN) && queue->hctx)
blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY);
return BLK_STS_RESOURCE;
} }
static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue,
......
...@@ -2047,9 +2047,9 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -2047,9 +2047,9 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
case BLK_STS_OK: case BLK_STS_OK:
break; break;
case BLK_STS_RESOURCE: case BLK_STS_RESOURCE:
if (atomic_read(&sdev->device_busy) == 0 && if (atomic_read(&sdev->device_busy) ||
!scsi_device_blocked(sdev)) scsi_device_blocked(sdev))
blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY); ret = BLK_STS_DEV_RESOURCE;
break; break;
default: default:
/* /*
......
...@@ -39,6 +39,24 @@ typedef u8 __bitwise blk_status_t; ...@@ -39,6 +39,24 @@ typedef u8 __bitwise blk_status_t;
#define BLK_STS_AGAIN ((__force blk_status_t)12) #define BLK_STS_AGAIN ((__force blk_status_t)12)
/*
* BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if
* device related resources are unavailable, but the driver can guarantee
* that the queue will be rerun in the future once resources become
* available again. This is typically the case for device specific
* resources that are consumed for IO. If the driver fails allocating these
* resources, we know that inflight (or pending) IO will free these
* resource upon completion.
*
* This is different from BLK_STS_RESOURCE in that it explicitly references
* a device specific resource. For resources of wider scope, allocation
* failure can happen without having pending IO. This means that we can't
* rely on request completions freeing these resources, as IO may not be in
* flight. Examples of that are kernel memory allocations, DMA mappings, or
* any other system wide resources.
*/
#define BLK_STS_DEV_RESOURCE ((__force blk_status_t)13)
/** /**
* blk_path_error - returns true if error may be path related * blk_path_error - returns true if error may be path related
* @error: status the request was completed with * @error: status the request was completed with
......
...@@ -81,11 +81,14 @@ struct buffer_head { ...@@ -81,11 +81,14 @@ struct buffer_head {
/* /*
* macro tricks to expand the set_buffer_foo(), clear_buffer_foo() * macro tricks to expand the set_buffer_foo(), clear_buffer_foo()
* and buffer_foo() functions. * and buffer_foo() functions.
* To avoid reset buffer flags that are already set, because that causes
* a costly cache line transition, check the flag first.
*/ */
#define BUFFER_FNS(bit, name) \ #define BUFFER_FNS(bit, name) \
static __always_inline void set_buffer_##name(struct buffer_head *bh) \ static __always_inline void set_buffer_##name(struct buffer_head *bh) \
{ \ { \
set_bit(BH_##bit, &(bh)->b_state); \ if (!test_bit(BH_##bit, &(bh)->b_state)) \
set_bit(BH_##bit, &(bh)->b_state); \
} \ } \
static __always_inline void clear_buffer_##name(struct buffer_head *bh) \ static __always_inline void clear_buffer_##name(struct buffer_head *bh) \
{ \ { \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment