Commit 5e57dc81 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block IO fixes from Jens Axboe:
 "Second round of updates and fixes for 3.14-rc2.  Most of this stuff
  has been queued up for a while.  The notable exception is the blk-mq
  changes, which are naturally a bit more in flux still.

  The pull request contains:

   - Two bug fixes for the new immutable vecs, causing crashes with raid
     or swap.  From Kent.

   - Various blk-mq tweaks and fixes from Christoph.  A fix for
     integrity bio's from Nic.

   - A few bcache fixes from Kent and Darrick Wong.

   - xen-blk{front,back} fixes from David Vrabel, Matt Rushton, Nicolas
     Swenson, and Roger Pau Monne.

   - Fix for a vec miscount with integrity vectors from Martin.

   - Minor annotations or fixes from Masanari Iida and Rashika Kheria.

   - Tweak to null_blk to do more normal FIFO processing of requests
     from Shlomo Pongratz.

   - Elevator switching bypass fix from Tejun.

   - Softlockup in blkdev_issue_discard() fix when !CONFIG_PREEMPT from
     me"

* 'for-linus' of git://git.kernel.dk/linux-block: (31 commits)
  block: add cond_resched() to potentially long running ioctl discard loop
  xen-blkback: init persistent_purge_work work_struct
  blk-mq: pair blk_mq_start_request / blk_mq_requeue_request
  blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq
  block: Fix cloning of discard/write same bios
  block: Fix type mismatch in ssize_t_blk_mq_tag_sysfs_show
  blk-mq: rework flush sequencing logic
  null_blk: use blk_complete_request and blk_mq_complete_request
  virtio_blk: use blk_mq_complete_request
  blk-mq: rework I/O completions
  fs: Add prototype declaration to appropriate header file include/linux/bio.h
  fs: Mark function as static in fs/bio-integrity.c
  block/null_blk: Fix completion processing from LIFO to FIFO
  block: Explicitly handle discard/write same segments
  block: Fix nr_vecs for inline integrity vectors
  blk-mq: Add bio_integrity setup to blk_mq_make_request
  blk-mq: initialize sg_reserved_size
  blk-mq: handle dma_drain_size
  blk-mq: divert __blk_put_request for MQ ops
  blk-mq: support at_head inserations for blk_execute_rq
  ...
parents 0d25e369 c8123f8c
...@@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) ...@@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
if (!uninit_q) if (!uninit_q)
return NULL; return NULL;
uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
if (!uninit_q->flush_rq)
goto out_cleanup_queue;
q = blk_init_allocated_queue(uninit_q, rfn, lock); q = blk_init_allocated_queue(uninit_q, rfn, lock);
if (!q) if (!q)
blk_cleanup_queue(uninit_q); goto out_free_flush_rq;
return q; return q;
out_free_flush_rq:
kfree(uninit_q->flush_rq);
out_cleanup_queue:
blk_cleanup_queue(uninit_q);
return NULL;
} }
EXPORT_SYMBOL(blk_init_queue_node); EXPORT_SYMBOL(blk_init_queue_node);
...@@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, ...@@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
{ {
if (q->mq_ops) if (q->mq_ops)
return blk_mq_alloc_request(q, rw, gfp_mask, false); return blk_mq_alloc_request(q, rw, gfp_mask);
else else
return blk_old_get_request(q, rw, gfp_mask); return blk_old_get_request(q, rw, gfp_mask);
} }
...@@ -1278,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req) ...@@ -1278,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req)
if (unlikely(!q)) if (unlikely(!q))
return; return;
if (q->mq_ops) {
blk_mq_free_request(req);
return;
}
blk_pm_put_request(req); blk_pm_put_request(req);
elv_completed_request(q, req); elv_completed_request(q, req);
......
...@@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, ...@@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
* be resued after dying flag is set * be resued after dying flag is set
*/ */
if (q->mq_ops) { if (q->mq_ops) {
blk_mq_insert_request(q, rq, true); blk_mq_insert_request(q, rq, at_head, true);
return; return;
} }
......
...@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq) ...@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq)
blk_clear_rq_complete(rq); blk_clear_rq_complete(rq);
} }
static void mq_flush_data_run(struct work_struct *work) static void mq_flush_run(struct work_struct *work)
{ {
struct request *rq; struct request *rq;
rq = container_of(work, struct request, mq_flush_data); rq = container_of(work, struct request, mq_flush_work);
memset(&rq->csd, 0, sizeof(rq->csd)); memset(&rq->csd, 0, sizeof(rq->csd));
blk_mq_run_request(rq, true, false); blk_mq_run_request(rq, true, false);
} }
static void blk_mq_flush_data_insert(struct request *rq) static bool blk_flush_queue_rq(struct request *rq)
{ {
INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); if (rq->q->mq_ops) {
kblockd_schedule_work(rq->q, &rq->mq_flush_data); INIT_WORK(&rq->mq_flush_work, mq_flush_run);
kblockd_schedule_work(rq->q, &rq->mq_flush_work);
return false;
} else {
list_add_tail(&rq->queuelist, &rq->q->queue_head);
return true;
}
} }
/** /**
...@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, ...@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
case REQ_FSEQ_DATA: case REQ_FSEQ_DATA:
list_move_tail(&rq->flush.list, &q->flush_data_in_flight); list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
if (q->mq_ops) queued = blk_flush_queue_rq(rq);
blk_mq_flush_data_insert(rq);
else {
list_add(&rq->queuelist, &q->queue_head);
queued = true;
}
break; break;
case REQ_FSEQ_DONE: case REQ_FSEQ_DONE:
...@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, ...@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
} }
kicked = blk_kick_flush(q); kicked = blk_kick_flush(q);
/* blk_mq_run_flush will run queue */
if (q->mq_ops)
return queued;
return kicked | queued; return kicked | queued;
} }
...@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error) ...@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error)
struct request *rq, *n; struct request *rq, *n;
unsigned long flags = 0; unsigned long flags = 0;
if (q->mq_ops) { if (q->mq_ops)
blk_mq_free_request(flush_rq);
spin_lock_irqsave(&q->mq_flush_lock, flags); spin_lock_irqsave(&q->mq_flush_lock, flags);
}
running = &q->flush_queue[q->flush_running_idx]; running = &q->flush_queue[q->flush_running_idx];
BUG_ON(q->flush_pending_idx == q->flush_running_idx); BUG_ON(q->flush_pending_idx == q->flush_running_idx);
...@@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error) ...@@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error)
* kblockd. * kblockd.
*/ */
if (queued || q->flush_queue_delayed) { if (queued || q->flush_queue_delayed) {
if (!q->mq_ops) WARN_ON(q->mq_ops);
blk_run_queue_async(q); blk_run_queue_async(q);
else
/*
* This can be optimized to only run queues with requests
* queued if necessary.
*/
blk_mq_run_queues(q, true);
} }
q->flush_queue_delayed = 0; q->flush_queue_delayed = 0;
if (q->mq_ops) if (q->mq_ops)
spin_unlock_irqrestore(&q->mq_flush_lock, flags); spin_unlock_irqrestore(&q->mq_flush_lock, flags);
} }
static void mq_flush_work(struct work_struct *work)
{
struct request_queue *q;
struct request *rq;
q = container_of(work, struct request_queue, mq_flush_work);
/* We don't need set REQ_FLUSH_SEQ, it's for consistency */
rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
__GFP_WAIT|GFP_ATOMIC, true);
rq->cmd_type = REQ_TYPE_FS;
rq->end_io = flush_end_io;
blk_mq_run_request(rq, true, false);
}
/*
* We can't directly use q->flush_rq, because it doesn't have tag and is not in
* hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
* so offload the work to workqueue.
*
* Note: we assume a flush request finished in any hardware queue will flush
* the whole disk cache.
*/
static void mq_run_flush(struct request_queue *q)
{
kblockd_schedule_work(q, &q->mq_flush_work);
}
/** /**
* blk_kick_flush - consider issuing flush request * blk_kick_flush - consider issuing flush request
* @q: request_queue being kicked * @q: request_queue being kicked
...@@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q) ...@@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q)
* different from running_idx, which means flush is in flight. * different from running_idx, which means flush is in flight.
*/ */
q->flush_pending_idx ^= 1; q->flush_pending_idx ^= 1;
if (q->mq_ops) { if (q->mq_ops) {
mq_run_flush(q); struct blk_mq_ctx *ctx = first_rq->mq_ctx;
return true; struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
blk_mq_rq_init(hctx, q->flush_rq);
q->flush_rq->mq_ctx = ctx;
/*
* Reuse the tag value from the fist waiting request,
* with blk-mq the tag is generated during request
* allocation and drivers can rely on it being inside
* the range they asked for.
*/
q->flush_rq->tag = first_rq->tag;
} else {
blk_rq_init(q, q->flush_rq);
} }
blk_rq_init(q, &q->flush_rq); q->flush_rq->cmd_type = REQ_TYPE_FS;
q->flush_rq.cmd_type = REQ_TYPE_FS; q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; q->flush_rq->rq_disk = first_rq->rq_disk;
q->flush_rq.rq_disk = first_rq->rq_disk; q->flush_rq->end_io = flush_end_io;
q->flush_rq.end_io = flush_end_io;
list_add_tail(&q->flush_rq.queuelist, &q->queue_head); return blk_flush_queue_rq(q->flush_rq);
return true;
} }
static void flush_data_end_io(struct request *rq, int error) static void flush_data_end_io(struct request *rq, int error)
...@@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush); ...@@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush);
void blk_mq_init_flush(struct request_queue *q) void blk_mq_init_flush(struct request_queue *q)
{ {
spin_lock_init(&q->mq_flush_lock); spin_lock_init(&q->mq_flush_lock);
INIT_WORK(&q->mq_flush_work, mq_flush_work);
} }
...@@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, ...@@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
atomic_inc(&bb.done); atomic_inc(&bb.done);
submit_bio(type, bio); submit_bio(type, bio);
/*
* We can loop for a long time in here, if someone does
* full device discards (like mkfs). Be nice and allow
* us to schedule out to avoid softlocking if preempt
* is disabled.
*/
cond_resched();
} }
blk_finish_plug(&plug); blk_finish_plug(&plug);
......
...@@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, ...@@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
if (!bio) if (!bio)
return 0; return 0;
/*
* This should probably be returning 0, but blk_add_request_payload()
* (Christoph!!!!)
*/
if (bio->bi_rw & REQ_DISCARD)
return 1;
if (bio->bi_rw & REQ_WRITE_SAME)
return 1;
fbio = bio; fbio = bio;
cluster = blk_queue_cluster(q); cluster = blk_queue_cluster(q);
seg_size = 0; seg_size = 0;
...@@ -161,30 +171,60 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, ...@@ -161,30 +171,60 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
*bvprv = *bvec; *bvprv = *bvec;
} }
/* static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
* map a request to scatterlist, return number of sg entries setup. Caller struct scatterlist *sglist,
* must make sure sg can hold rq->nr_phys_segments entries struct scatterlist **sg)
*/
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
struct scatterlist *sglist)
{ {
struct bio_vec bvec, bvprv = { NULL }; struct bio_vec bvec, bvprv = { NULL };
struct req_iterator iter; struct bvec_iter iter;
struct scatterlist *sg;
int nsegs, cluster; int nsegs, cluster;
nsegs = 0; nsegs = 0;
cluster = blk_queue_cluster(q); cluster = blk_queue_cluster(q);
if (bio->bi_rw & REQ_DISCARD) {
/* /*
* for each bio in rq * This is a hack - drivers should be neither modifying the
* biovec, nor relying on bi_vcnt - but because of
* blk_add_request_payload(), a discard bio may or may not have
* a payload we need to set up here (thank you Christoph) and
* bi_vcnt is really the only way of telling if we need to.
*/ */
sg = NULL;
rq_for_each_segment(bvec, rq, iter) { if (bio->bi_vcnt)
__blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, goto single_segment;
return 0;
}
if (bio->bi_rw & REQ_WRITE_SAME) {
single_segment:
*sg = sglist;
bvec = bio_iovec(bio);
sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
return 1;
}
for_each_bio(bio)
bio_for_each_segment(bvec, bio, iter)
__blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg,
&nsegs, &cluster); &nsegs, &cluster);
} /* segments in rq */
return nsegs;
}
/*
* map a request to scatterlist, return number of sg entries setup. Caller
* must make sure sg can hold rq->nr_phys_segments entries
*/
int blk_rq_map_sg(struct request_queue *q, struct request *rq,
struct scatterlist *sglist)
{
struct scatterlist *sg = NULL;
int nsegs = 0;
if (rq->bio)
nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
if (unlikely(rq->cmd_flags & REQ_COPY_USER) && if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
(blk_rq_bytes(rq) & q->dma_pad_mask)) { (blk_rq_bytes(rq) & q->dma_pad_mask)) {
...@@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg); ...@@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg);
int blk_bio_map_sg(struct request_queue *q, struct bio *bio, int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
struct scatterlist *sglist) struct scatterlist *sglist)
{ {
struct bio_vec bvec, bvprv = { NULL }; struct scatterlist *sg = NULL;
struct scatterlist *sg; int nsegs;
int nsegs, cluster; struct bio *next = bio->bi_next;
struct bvec_iter iter; bio->bi_next = NULL;
nsegs = 0;
cluster = blk_queue_cluster(q);
sg = NULL;
bio_for_each_segment(bvec, bio, iter) {
__blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
&nsegs, &cluster);
} /* segments in bio */
nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
bio->bi_next = next;
if (sg) if (sg)
sg_mark_end(sg); sg_mark_end(sg);
......
...@@ -184,7 +184,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags) ...@@ -184,7 +184,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
{ {
char *orig_page = page; char *orig_page = page;
int cpu; unsigned int cpu;
if (!tags) if (!tags)
return 0; return 0;
......
...@@ -226,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, ...@@ -226,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
return rq; return rq;
} }
struct request *blk_mq_alloc_request(struct request_queue *q, int rw, struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
gfp_t gfp, bool reserved)
{ {
struct request *rq; struct request *rq;
if (blk_mq_queue_enter(q)) if (blk_mq_queue_enter(q))
return NULL; return NULL;
rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
if (rq) if (rq)
blk_mq_put_ctx(rq->mq_ctx); blk_mq_put_ctx(rq->mq_ctx);
return rq; return rq;
...@@ -258,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request); ...@@ -258,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
/* /*
* Re-init and set pdu, if we have it * Re-init and set pdu, if we have it
*/ */
static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq)
{ {
blk_rq_init(hctx->queue, rq); blk_rq_init(hctx->queue, rq);
...@@ -305,7 +304,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) ...@@ -305,7 +304,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error)
bio_endio(bio, error); bio_endio(bio, error);
} }
void blk_mq_complete_request(struct request *rq, int error) void blk_mq_end_io(struct request *rq, int error)
{ {
struct bio *bio = rq->bio; struct bio *bio = rq->bio;
unsigned int bytes = 0; unsigned int bytes = 0;
...@@ -330,48 +329,55 @@ void blk_mq_complete_request(struct request *rq, int error) ...@@ -330,48 +329,55 @@ void blk_mq_complete_request(struct request *rq, int error)
else else
blk_mq_free_request(rq); blk_mq_free_request(rq);
} }
EXPORT_SYMBOL(blk_mq_end_io);
void __blk_mq_end_io(struct request *rq, int error) static void __blk_mq_complete_request_remote(void *data)
{
if (!blk_mark_rq_complete(rq))
blk_mq_complete_request(rq, error);
}
static void blk_mq_end_io_remote(void *data)
{ {
struct request *rq = data; struct request *rq = data;
__blk_mq_end_io(rq, rq->errors); rq->q->softirq_done_fn(rq);
} }
/* void __blk_mq_complete_request(struct request *rq)
* End IO on this request on a multiqueue enabled driver. We'll either do
* it directly inline, or punt to a local IPI handler on the matching
* remote CPU.
*/
void blk_mq_end_io(struct request *rq, int error)
{ {
struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_ctx *ctx = rq->mq_ctx;
int cpu; int cpu;
if (!ctx->ipi_redirect) if (!ctx->ipi_redirect) {
return __blk_mq_end_io(rq, error); rq->q->softirq_done_fn(rq);
return;
}
cpu = get_cpu(); cpu = get_cpu();
if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
rq->errors = error; rq->csd.func = __blk_mq_complete_request_remote;
rq->csd.func = blk_mq_end_io_remote;
rq->csd.info = rq; rq->csd.info = rq;
rq->csd.flags = 0; rq->csd.flags = 0;
__smp_call_function_single(ctx->cpu, &rq->csd, 0); __smp_call_function_single(ctx->cpu, &rq->csd, 0);
} else { } else {
__blk_mq_end_io(rq, error); rq->q->softirq_done_fn(rq);
} }
put_cpu(); put_cpu();
} }
EXPORT_SYMBOL(blk_mq_end_io);
static void blk_mq_start_request(struct request *rq) /**
* blk_mq_complete_request - end I/O on a request
* @rq: the request being processed
*
* Description:
* Ends all I/O on a request. It does not handle partial completions.
* The actual completion happens out-of-order, through a IPI handler.
**/
void blk_mq_complete_request(struct request *rq)
{
if (unlikely(blk_should_fake_timeout(rq->q)))
return;
if (!blk_mark_rq_complete(rq))
__blk_mq_complete_request(rq);
}
EXPORT_SYMBOL(blk_mq_complete_request);
static void blk_mq_start_request(struct request *rq, bool last)
{ {
struct request_queue *q = rq->q; struct request_queue *q = rq->q;
...@@ -384,6 +390,25 @@ static void blk_mq_start_request(struct request *rq) ...@@ -384,6 +390,25 @@ static void blk_mq_start_request(struct request *rq)
*/ */
rq->deadline = jiffies + q->rq_timeout; rq->deadline = jiffies + q->rq_timeout;
set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
if (q->dma_drain_size && blk_rq_bytes(rq)) {
/*
* Make sure space for the drain appears. We know we can do
* this because max_hw_segments has been adjusted to be one
* fewer than the device can handle.
*/
rq->nr_phys_segments++;
}
/*
* Flag the last request in the series so that drivers know when IO
* should be kicked off, if they don't do it on a per-request basis.
*
* Note: the flag isn't the only condition drivers should do kick off.
* If drive is busy, the last request might not have the bit set.
*/
if (last)
rq->cmd_flags |= REQ_END;
} }
static void blk_mq_requeue_request(struct request *rq) static void blk_mq_requeue_request(struct request *rq)
...@@ -392,6 +417,11 @@ static void blk_mq_requeue_request(struct request *rq) ...@@ -392,6 +417,11 @@ static void blk_mq_requeue_request(struct request *rq)
trace_block_rq_requeue(q, rq); trace_block_rq_requeue(q, rq);
clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
rq->cmd_flags &= ~REQ_END;
if (q->dma_drain_size && blk_rq_bytes(rq))
rq->nr_phys_segments--;
} }
struct blk_mq_timeout_data { struct blk_mq_timeout_data {
...@@ -559,19 +589,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) ...@@ -559,19 +589,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
rq = list_first_entry(&rq_list, struct request, queuelist); rq = list_first_entry(&rq_list, struct request, queuelist);
list_del_init(&rq->queuelist); list_del_init(&rq->queuelist);
blk_mq_start_request(rq);
/* blk_mq_start_request(rq, list_empty(&rq_list));
* Last request in the series. Flag it as such, this
* enables drivers to know when IO should be kicked off,
* if they don't do it on a per-request basis.
*
* Note: the flag isn't the only condition drivers
* should do kick off. If drive is busy, the last
* request might not have the bit set.
*/
if (list_empty(&rq_list))
rq->cmd_flags |= REQ_END;
ret = q->mq_ops->queue_rq(hctx, rq); ret = q->mq_ops->queue_rq(hctx, rq);
switch (ret) { switch (ret) {
...@@ -589,8 +608,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) ...@@ -589,8 +608,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
break; break;
default: default:
pr_err("blk-mq: bad return on queue: %d\n", ret); pr_err("blk-mq: bad return on queue: %d\n", ret);
rq->errors = -EIO;
case BLK_MQ_RQ_QUEUE_ERROR: case BLK_MQ_RQ_QUEUE_ERROR:
rq->errors = -EIO;
blk_mq_end_io(rq, rq->errors); blk_mq_end_io(rq, rq->errors);
break; break;
} }
...@@ -693,12 +712,15 @@ static void blk_mq_work_fn(struct work_struct *work) ...@@ -693,12 +712,15 @@ static void blk_mq_work_fn(struct work_struct *work)
} }
static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
struct request *rq) struct request *rq, bool at_head)
{ {
struct blk_mq_ctx *ctx = rq->mq_ctx; struct blk_mq_ctx *ctx = rq->mq_ctx;
trace_block_rq_insert(hctx->queue, rq); trace_block_rq_insert(hctx->queue, rq);
if (at_head)
list_add(&rq->queuelist, &ctx->rq_list);
else
list_add_tail(&rq->queuelist, &ctx->rq_list); list_add_tail(&rq->queuelist, &ctx->rq_list);
blk_mq_hctx_mark_pending(hctx, ctx); blk_mq_hctx_mark_pending(hctx, ctx);
...@@ -709,7 +731,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, ...@@ -709,7 +731,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
} }
void blk_mq_insert_request(struct request_queue *q, struct request *rq, void blk_mq_insert_request(struct request_queue *q, struct request *rq,
bool run_queue) bool at_head, bool run_queue)
{ {
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx, *current_ctx; struct blk_mq_ctx *ctx, *current_ctx;
...@@ -728,7 +750,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq, ...@@ -728,7 +750,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq,
rq->mq_ctx = ctx; rq->mq_ctx = ctx;
} }
spin_lock(&ctx->lock); spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq); __blk_mq_insert_request(hctx, rq, at_head);
spin_unlock(&ctx->lock); spin_unlock(&ctx->lock);
blk_mq_put_ctx(current_ctx); blk_mq_put_ctx(current_ctx);
...@@ -760,7 +782,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async) ...@@ -760,7 +782,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async)
/* ctx->cpu might be offline */ /* ctx->cpu might be offline */
spin_lock(&ctx->lock); spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq); __blk_mq_insert_request(hctx, rq, false);
spin_unlock(&ctx->lock); spin_unlock(&ctx->lock);
blk_mq_put_ctx(current_ctx); blk_mq_put_ctx(current_ctx);
...@@ -798,7 +820,7 @@ static void blk_mq_insert_requests(struct request_queue *q, ...@@ -798,7 +820,7 @@ static void blk_mq_insert_requests(struct request_queue *q,
rq = list_first_entry(list, struct request, queuelist); rq = list_first_entry(list, struct request, queuelist);
list_del_init(&rq->queuelist); list_del_init(&rq->queuelist);
rq->mq_ctx = ctx; rq->mq_ctx = ctx;
__blk_mq_insert_request(hctx, rq); __blk_mq_insert_request(hctx, rq, false);
} }
spin_unlock(&ctx->lock); spin_unlock(&ctx->lock);
...@@ -888,6 +910,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) ...@@ -888,6 +910,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_queue_bounce(q, &bio); blk_queue_bounce(q, &bio);
if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
bio_endio(bio, -EIO);
return;
}
if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) if (use_plug && blk_attempt_plug_merge(q, bio, &request_count))
return; return;
...@@ -950,7 +977,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) ...@@ -950,7 +977,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
__blk_mq_free_request(hctx, ctx, rq); __blk_mq_free_request(hctx, ctx, rq);
else { else {
blk_mq_bio_to_request(rq, bio); blk_mq_bio_to_request(rq, bio);
__blk_mq_insert_request(hctx, rq); __blk_mq_insert_request(hctx, rq, false);
} }
spin_unlock(&ctx->lock); spin_unlock(&ctx->lock);
...@@ -1309,15 +1336,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, ...@@ -1309,15 +1336,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
reg->queue_depth = BLK_MQ_MAX_DEPTH; reg->queue_depth = BLK_MQ_MAX_DEPTH;
} }
/*
* Set aside a tag for flush requests. It will only be used while
* another flush request is in progress but outside the driver.
*
* TODO: only allocate if flushes are supported
*/
reg->queue_depth++;
reg->reserved_tags++;
if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -1360,17 +1378,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, ...@@ -1360,17 +1378,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
q->mq_ops = reg->ops; q->mq_ops = reg->ops;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
q->sg_reserved_size = INT_MAX;
blk_queue_make_request(q, blk_mq_make_request); blk_queue_make_request(q, blk_mq_make_request);
blk_queue_rq_timed_out(q, reg->ops->timeout); blk_queue_rq_timed_out(q, reg->ops->timeout);
if (reg->timeout) if (reg->timeout)
blk_queue_rq_timeout(q, reg->timeout); blk_queue_rq_timeout(q, reg->timeout);
if (reg->ops->complete)
blk_queue_softirq_done(q, reg->ops->complete);
blk_mq_init_flush(q); blk_mq_init_flush(q);
blk_mq_init_cpu_queues(q, reg->nr_hw_queues); blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
if (blk_mq_init_hw_queues(q, reg, driver_data)) q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size,
cache_line_size()), GFP_KERNEL);
if (!q->flush_rq)
goto err_hw; goto err_hw;
if (blk_mq_init_hw_queues(q, reg, driver_data))
goto err_flush_rq;
blk_mq_map_swqueue(q); blk_mq_map_swqueue(q);
mutex_lock(&all_q_mutex); mutex_lock(&all_q_mutex);
...@@ -1378,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, ...@@ -1378,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
mutex_unlock(&all_q_mutex); mutex_unlock(&all_q_mutex);
return q; return q;
err_flush_rq:
kfree(q->flush_rq);
err_hw: err_hw:
kfree(q->mq_map); kfree(q->mq_map);
err_map: err_map:
......
...@@ -22,13 +22,13 @@ struct blk_mq_ctx { ...@@ -22,13 +22,13 @@ struct blk_mq_ctx {
struct kobject kobj; struct kobject kobj;
}; };
void __blk_mq_end_io(struct request *rq, int error); void __blk_mq_complete_request(struct request *rq);
void blk_mq_complete_request(struct request *rq, int error);
void blk_mq_run_request(struct request *rq, bool run_queue, bool async); void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_init_flush(struct request_queue *q); void blk_mq_init_flush(struct request_queue *q);
void blk_mq_drain_queue(struct request_queue *q); void blk_mq_drain_queue(struct request_queue *q);
void blk_mq_free_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q);
void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq);
/* /*
* CPU hotplug helpers * CPU hotplug helpers
......
...@@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj) ...@@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj)
if (q->mq_ops) if (q->mq_ops)
blk_mq_free_queue(q); blk_mq_free_queue(q);
kfree(q->flush_rq);
blk_trace_shutdown(q); blk_trace_shutdown(q);
bdi_destroy(&q->backing_dev_info); bdi_destroy(&q->backing_dev_info);
......
...@@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req) ...@@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req)
case BLK_EH_HANDLED: case BLK_EH_HANDLED:
/* Can we use req->errors here? */ /* Can we use req->errors here? */
if (q->mq_ops) if (q->mq_ops)
blk_mq_complete_request(req, req->errors); __blk_mq_complete_request(req);
else else
__blk_complete_request(req); __blk_complete_request(req);
break; break;
......
...@@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) ...@@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
q->flush_queue_delayed = 1; q->flush_queue_delayed = 1;
return NULL; return NULL;
} }
if (unlikely(blk_queue_dying(q)) || if (unlikely(blk_queue_bypass(q)) ||
!q->elevator->type->ops.elevator_dispatch_fn(q, 0)) !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
return NULL; return NULL;
} }
......
...@@ -60,7 +60,9 @@ enum { ...@@ -60,7 +60,9 @@ enum {
NULL_IRQ_NONE = 0, NULL_IRQ_NONE = 0,
NULL_IRQ_SOFTIRQ = 1, NULL_IRQ_SOFTIRQ = 1,
NULL_IRQ_TIMER = 2, NULL_IRQ_TIMER = 2,
};
enum {
NULL_Q_BIO = 0, NULL_Q_BIO = 0,
NULL_Q_RQ = 1, NULL_Q_RQ = 1,
NULL_Q_MQ = 2, NULL_Q_MQ = 2,
...@@ -172,17 +174,19 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) ...@@ -172,17 +174,19 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
static void end_cmd(struct nullb_cmd *cmd) static void end_cmd(struct nullb_cmd *cmd)
{ {
if (cmd->rq) { switch (queue_mode) {
if (queue_mode == NULL_Q_MQ) case NULL_Q_MQ:
blk_mq_end_io(cmd->rq, 0); blk_mq_end_io(cmd->rq, 0);
else { return;
case NULL_Q_RQ:
INIT_LIST_HEAD(&cmd->rq->queuelist); INIT_LIST_HEAD(&cmd->rq->queuelist);
blk_end_request_all(cmd->rq, 0); blk_end_request_all(cmd->rq, 0);
} break;
} else if (cmd->bio) case NULL_Q_BIO:
bio_endio(cmd->bio, 0); bio_endio(cmd->bio, 0);
break;
}
if (queue_mode != NULL_Q_MQ)
free_cmd(cmd); free_cmd(cmd);
} }
...@@ -195,6 +199,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) ...@@ -195,6 +199,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
cq = &per_cpu(completion_queues, smp_processor_id()); cq = &per_cpu(completion_queues, smp_processor_id());
while ((entry = llist_del_all(&cq->list)) != NULL) { while ((entry = llist_del_all(&cq->list)) != NULL) {
entry = llist_reverse_order(entry);
do { do {
cmd = container_of(entry, struct nullb_cmd, ll_list); cmd = container_of(entry, struct nullb_cmd, ll_list);
end_cmd(cmd); end_cmd(cmd);
...@@ -221,61 +226,31 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) ...@@ -221,61 +226,31 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
static void null_softirq_done_fn(struct request *rq) static void null_softirq_done_fn(struct request *rq)
{ {
blk_end_request_all(rq, 0); end_cmd(rq->special);
}
#ifdef CONFIG_SMP
static void null_ipi_cmd_end_io(void *data)
{
struct completion_queue *cq;
struct llist_node *entry, *next;
struct nullb_cmd *cmd;
cq = &per_cpu(completion_queues, smp_processor_id());
entry = llist_del_all(&cq->list);
while (entry) {
next = entry->next;
cmd = llist_entry(entry, struct nullb_cmd, ll_list);
end_cmd(cmd);
entry = next;
}
}
static void null_cmd_end_ipi(struct nullb_cmd *cmd)
{
struct call_single_data *data = &cmd->csd;
int cpu = get_cpu();
struct completion_queue *cq = &per_cpu(completion_queues, cpu);
cmd->ll_list.next = NULL;
if (llist_add(&cmd->ll_list, &cq->list)) {
data->func = null_ipi_cmd_end_io;
data->flags = 0;
__smp_call_function_single(cpu, data, 0);
}
put_cpu();
} }
#endif /* CONFIG_SMP */
static inline void null_handle_cmd(struct nullb_cmd *cmd) static inline void null_handle_cmd(struct nullb_cmd *cmd)
{ {
/* Complete IO by inline, softirq or timer */ /* Complete IO by inline, softirq or timer */
switch (irqmode) { switch (irqmode) {
case NULL_IRQ_NONE: case NULL_IRQ_SOFTIRQ:
switch (queue_mode) {
case NULL_Q_MQ:
blk_mq_complete_request(cmd->rq);
break;
case NULL_Q_RQ:
blk_complete_request(cmd->rq);
break;
case NULL_Q_BIO:
/*
* XXX: no proper submitting cpu information available.
*/
end_cmd(cmd); end_cmd(cmd);
break; break;
case NULL_IRQ_SOFTIRQ: }
#ifdef CONFIG_SMP break;
null_cmd_end_ipi(cmd); case NULL_IRQ_NONE:
#else
end_cmd(cmd); end_cmd(cmd);
#endif
break; break;
case NULL_IRQ_TIMER: case NULL_IRQ_TIMER:
null_cmd_end_timer(cmd); null_cmd_end_timer(cmd);
...@@ -411,6 +386,7 @@ static struct blk_mq_ops null_mq_ops = { ...@@ -411,6 +386,7 @@ static struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq, .queue_rq = null_queue_rq,
.map_queue = blk_mq_map_queue, .map_queue = blk_mq_map_queue,
.init_hctx = null_init_hctx, .init_hctx = null_init_hctx,
.complete = null_softirq_done_fn,
}; };
static struct blk_mq_reg null_mq_reg = { static struct blk_mq_reg null_mq_reg = {
...@@ -609,13 +585,6 @@ static int __init null_init(void) ...@@ -609,13 +585,6 @@ static int __init null_init(void)
{ {
unsigned int i; unsigned int i;
#if !defined(CONFIG_SMP)
if (irqmode == NULL_IRQ_SOFTIRQ) {
pr_warn("null_blk: softirq completions not available.\n");
pr_warn("null_blk: using direct completions.\n");
irqmode = NULL_IRQ_NONE;
}
#endif
if (bs > PAGE_SIZE) { if (bs > PAGE_SIZE) {
pr_warn("null_blk: invalid block size\n"); pr_warn("null_blk: invalid block size\n");
pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
......
...@@ -110,9 +110,9 @@ static int __virtblk_add_req(struct virtqueue *vq, ...@@ -110,9 +110,9 @@ static int __virtblk_add_req(struct virtqueue *vq,
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
} }
static inline void virtblk_request_done(struct virtblk_req *vbr) static inline void virtblk_request_done(struct request *req)
{ {
struct request *req = vbr->req; struct virtblk_req *vbr = req->special;
int error = virtblk_result(vbr); int error = virtblk_result(vbr);
if (req->cmd_type == REQ_TYPE_BLOCK_PC) { if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
...@@ -138,7 +138,7 @@ static void virtblk_done(struct virtqueue *vq) ...@@ -138,7 +138,7 @@ static void virtblk_done(struct virtqueue *vq)
do { do {
virtqueue_disable_cb(vq); virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
virtblk_request_done(vbr); blk_mq_complete_request(vbr->req);
req_done = true; req_done = true;
} }
if (unlikely(virtqueue_is_broken(vq))) if (unlikely(virtqueue_is_broken(vq)))
...@@ -479,6 +479,7 @@ static struct blk_mq_ops virtio_mq_ops = { ...@@ -479,6 +479,7 @@ static struct blk_mq_ops virtio_mq_ops = {
.map_queue = blk_mq_map_queue, .map_queue = blk_mq_map_queue,
.alloc_hctx = blk_mq_alloc_single_hw_queue, .alloc_hctx = blk_mq_alloc_single_hw_queue,
.free_hctx = blk_mq_free_single_hw_queue, .free_hctx = blk_mq_free_single_hw_queue,
.complete = virtblk_request_done,
}; };
static struct blk_mq_reg virtio_mq_reg = { static struct blk_mq_reg virtio_mq_reg = {
......
...@@ -299,7 +299,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, ...@@ -299,7 +299,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
BUG_ON(num != 0); BUG_ON(num != 0);
} }
static void unmap_purged_grants(struct work_struct *work) void xen_blkbk_unmap_purged_grants(struct work_struct *work)
{ {
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
...@@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) ...@@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
INIT_LIST_HEAD(&blkif->persistent_purge_list); BUG_ON(!list_empty(&blkif->persistent_purge_list));
root = &blkif->persistent_gnts; root = &blkif->persistent_gnts;
purge_list: purge_list:
foreach_grant_safe(persistent_gnt, n, root, node) { foreach_grant_safe(persistent_gnt, n, root, node) {
...@@ -420,7 +420,6 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) ...@@ -420,7 +420,6 @@ static void purge_persistent_gnt(struct xen_blkif *blkif)
blkif->vbd.overflow_max_grants = 0; blkif->vbd.overflow_max_grants = 0;
/* We can defer this work */ /* We can defer this work */
INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
schedule_work(&blkif->persistent_purge_work); schedule_work(&blkif->persistent_purge_work);
pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total); pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
return; return;
...@@ -625,9 +624,23 @@ int xen_blkif_schedule(void *arg) ...@@ -625,9 +624,23 @@ int xen_blkif_schedule(void *arg)
print_stats(blkif); print_stats(blkif);
} }
/* Since we are shutting down remove all pages from the buffer */ /* Drain pending purge work */
shrink_free_pagepool(blkif, 0 /* All */); flush_work(&blkif->persistent_purge_work);
if (log_stats)
print_stats(blkif);
blkif->xenblkd = NULL;
xen_blkif_put(blkif);
return 0;
}
/*
* Remove persistent grants and empty the pool of free pages
*/
void xen_blkbk_free_caches(struct xen_blkif *blkif)
{
/* Free all persistent grant pages */ /* Free all persistent grant pages */
if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
free_persistent_gnts(blkif, &blkif->persistent_gnts, free_persistent_gnts(blkif, &blkif->persistent_gnts,
...@@ -636,13 +649,8 @@ int xen_blkif_schedule(void *arg) ...@@ -636,13 +649,8 @@ int xen_blkif_schedule(void *arg)
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
blkif->persistent_gnt_c = 0; blkif->persistent_gnt_c = 0;
if (log_stats) /* Since we are shutting down remove all pages from the buffer */
print_stats(blkif); shrink_free_pagepool(blkif, 0 /* All */);
blkif->xenblkd = NULL;
xen_blkif_put(blkif);
return 0;
} }
/* /*
...@@ -838,7 +846,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, ...@@ -838,7 +846,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
struct grant_page **pages = pending_req->indirect_pages; struct grant_page **pages = pending_req->indirect_pages;
struct xen_blkif *blkif = pending_req->blkif; struct xen_blkif *blkif = pending_req->blkif;
int indirect_grefs, rc, n, nseg, i; int indirect_grefs, rc, n, nseg, i;
struct blkif_request_segment_aligned *segments = NULL; struct blkif_request_segment *segments = NULL;
nseg = pending_req->nr_pages; nseg = pending_req->nr_pages;
indirect_grefs = INDIRECT_PAGES(nseg); indirect_grefs = INDIRECT_PAGES(nseg);
...@@ -934,9 +942,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif) ...@@ -934,9 +942,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif)
{ {
atomic_set(&blkif->drain, 1); atomic_set(&blkif->drain, 1);
do { do {
/* The initial value is one, and one refcnt taken at the if (atomic_read(&blkif->inflight) == 0)
* start of the xen_blkif_schedule thread. */
if (atomic_read(&blkif->refcnt) <= 2)
break; break;
wait_for_completion_interruptible_timeout( wait_for_completion_interruptible_timeout(
&blkif->drain_complete, HZ); &blkif->drain_complete, HZ);
...@@ -976,17 +982,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) ...@@ -976,17 +982,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
* the proper response on the ring. * the proper response on the ring.
*/ */
if (atomic_dec_and_test(&pending_req->pendcnt)) { if (atomic_dec_and_test(&pending_req->pendcnt)) {
xen_blkbk_unmap(pending_req->blkif, struct xen_blkif *blkif = pending_req->blkif;
xen_blkbk_unmap(blkif,
pending_req->segments, pending_req->segments,
pending_req->nr_pages); pending_req->nr_pages);
make_response(pending_req->blkif, pending_req->id, make_response(blkif, pending_req->id,
pending_req->operation, pending_req->status); pending_req->operation, pending_req->status);
xen_blkif_put(pending_req->blkif); free_req(blkif, pending_req);
if (atomic_read(&pending_req->blkif->refcnt) <= 2) { /*
if (atomic_read(&pending_req->blkif->drain)) * Make sure the request is freed before releasing blkif,
complete(&pending_req->blkif->drain_complete); * or there could be a race between free_req and the
* cleanup done in xen_blkif_free during shutdown.
*
* NB: The fact that we might try to wake up pending_free_wq
* before drain_complete (in case there's a drain going on)
* it's not a problem with our current implementation
* because we can assure there's no thread waiting on
* pending_free_wq if there's a drain going on, but it has
* to be taken into account if the current model is changed.
*/
if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
complete(&blkif->drain_complete);
} }
free_req(pending_req->blkif, pending_req); xen_blkif_put(blkif);
} }
} }
...@@ -1240,6 +1259,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, ...@@ -1240,6 +1259,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
* below (in "!bio") if we are handling a BLKIF_OP_DISCARD. * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
*/ */
xen_blkif_get(blkif); xen_blkif_get(blkif);
atomic_inc(&blkif->inflight);
for (i = 0; i < nseg; i++) { for (i = 0; i < nseg; i++) {
while ((bio == NULL) || while ((bio == NULL) ||
......
...@@ -57,7 +57,7 @@ ...@@ -57,7 +57,7 @@
#define MAX_INDIRECT_SEGMENTS 256 #define MAX_INDIRECT_SEGMENTS 256
#define SEGS_PER_INDIRECT_FRAME \ #define SEGS_PER_INDIRECT_FRAME \
(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) (PAGE_SIZE/sizeof(struct blkif_request_segment))
#define MAX_INDIRECT_PAGES \ #define MAX_INDIRECT_PAGES \
((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
#define INDIRECT_PAGES(_segs) \ #define INDIRECT_PAGES(_segs) \
...@@ -278,6 +278,7 @@ struct xen_blkif { ...@@ -278,6 +278,7 @@ struct xen_blkif {
/* for barrier (drain) requests */ /* for barrier (drain) requests */
struct completion drain_complete; struct completion drain_complete;
atomic_t drain; atomic_t drain;
atomic_t inflight;
/* One thread per one blkif. */ /* One thread per one blkif. */
struct task_struct *xenblkd; struct task_struct *xenblkd;
unsigned int waiting_reqs; unsigned int waiting_reqs;
...@@ -376,6 +377,7 @@ int xen_blkif_xenbus_init(void); ...@@ -376,6 +377,7 @@ int xen_blkif_xenbus_init(void);
irqreturn_t xen_blkif_be_int(int irq, void *dev_id); irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
int xen_blkif_schedule(void *arg); int xen_blkif_schedule(void *arg);
int xen_blkif_purge_persistent(void *arg); int xen_blkif_purge_persistent(void *arg);
void xen_blkbk_free_caches(struct xen_blkif *blkif);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state); struct backend_info *be, int state);
...@@ -383,6 +385,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, ...@@ -383,6 +385,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
int xen_blkbk_barrier(struct xenbus_transaction xbt, int xen_blkbk_barrier(struct xenbus_transaction xbt,
struct backend_info *be, int state); struct backend_info *be, int state);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
void xen_blkbk_unmap_purged_grants(struct work_struct *work);
static inline void blkif_get_x86_32_req(struct blkif_request *dst, static inline void blkif_get_x86_32_req(struct blkif_request *dst,
struct blkif_x86_32_request *src) struct blkif_x86_32_request *src)
......
...@@ -125,8 +125,11 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) ...@@ -125,8 +125,11 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
blkif->persistent_gnts.rb_node = NULL; blkif->persistent_gnts.rb_node = NULL;
spin_lock_init(&blkif->free_pages_lock); spin_lock_init(&blkif->free_pages_lock);
INIT_LIST_HEAD(&blkif->free_pages); INIT_LIST_HEAD(&blkif->free_pages);
INIT_LIST_HEAD(&blkif->persistent_purge_list);
blkif->free_pages_num = 0; blkif->free_pages_num = 0;
atomic_set(&blkif->persistent_gnt_in_use, 0); atomic_set(&blkif->persistent_gnt_in_use, 0);
atomic_set(&blkif->inflight, 0);
INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants);
INIT_LIST_HEAD(&blkif->pending_free); INIT_LIST_HEAD(&blkif->pending_free);
...@@ -259,6 +262,17 @@ static void xen_blkif_free(struct xen_blkif *blkif) ...@@ -259,6 +262,17 @@ static void xen_blkif_free(struct xen_blkif *blkif)
if (!atomic_dec_and_test(&blkif->refcnt)) if (!atomic_dec_and_test(&blkif->refcnt))
BUG(); BUG();
/* Remove all persistent grants and the cache of ballooned pages. */
xen_blkbk_free_caches(blkif);
/* Make sure everything is drained before shutting down */
BUG_ON(blkif->persistent_gnt_c != 0);
BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0);
BUG_ON(blkif->free_pages_num != 0);
BUG_ON(!list_empty(&blkif->persistent_purge_list));
BUG_ON(!list_empty(&blkif->free_pages));
BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
/* Check that there is no request in use */ /* Check that there is no request in use */
list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
list_del(&req->free_list); list_del(&req->free_list);
......
...@@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock); ...@@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock);
#define DEV_NAME "xvd" /* name in /dev */ #define DEV_NAME "xvd" /* name in /dev */
#define SEGS_PER_INDIRECT_FRAME \ #define SEGS_PER_INDIRECT_FRAME \
(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) (PAGE_SIZE/sizeof(struct blkif_request_segment))
#define INDIRECT_GREFS(_segs) \ #define INDIRECT_GREFS(_segs) \
((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
...@@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req) ...@@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req)
unsigned long id; unsigned long id;
unsigned int fsect, lsect; unsigned int fsect, lsect;
int i, ref, n; int i, ref, n;
struct blkif_request_segment_aligned *segments = NULL; struct blkif_request_segment *segments = NULL;
/* /*
* Used to store if we are able to queue the request by just using * Used to store if we are able to queue the request by just using
...@@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req) ...@@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req)
} else { } else {
n = i % SEGS_PER_INDIRECT_FRAME; n = i % SEGS_PER_INDIRECT_FRAME;
segments[n] = segments[n] =
(struct blkif_request_segment_aligned) { (struct blkif_request_segment) {
.gref = ref, .gref = ref,
.first_sect = fsect, .first_sect = fsect,
.last_sect = lsect }; .last_sect = lsect };
...@@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev, ...@@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev,
case XenbusStateReconfiguring: case XenbusStateReconfiguring:
case XenbusStateReconfigured: case XenbusStateReconfigured:
case XenbusStateUnknown: case XenbusStateUnknown:
case XenbusStateClosed:
break; break;
case XenbusStateConnected: case XenbusStateConnected:
blkfront_connect(info); blkfront_connect(info);
break; break;
case XenbusStateClosed:
if (dev->state == XenbusStateClosed)
break;
/* Missed the backend's Closing state -- fallthrough */
case XenbusStateClosing: case XenbusStateClosing:
blkfront_closing(info); blkfront_closing(info);
break; break;
......
...@@ -210,7 +210,9 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); ...@@ -210,7 +210,9 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2);
#define GC_MARK_RECLAIMABLE 0 #define GC_MARK_RECLAIMABLE 0
#define GC_MARK_DIRTY 1 #define GC_MARK_DIRTY 1
#define GC_MARK_METADATA 2 #define GC_MARK_METADATA 2
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13); #define GC_SECTORS_USED_SIZE 13
#define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE))
BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE);
BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1); BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1);
#include "journal.h" #include "journal.h"
......
...@@ -23,7 +23,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) ...@@ -23,7 +23,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
for (k = i->start; k < bset_bkey_last(i); k = next) { for (k = i->start; k < bset_bkey_last(i); k = next) {
next = bkey_next(k); next = bkey_next(k);
printk(KERN_ERR "block %u key %zi/%u: ", set, printk(KERN_ERR "block %u key %li/%u: ", set,
(uint64_t *) k - i->d, i->keys); (uint64_t *) k - i->d, i->keys);
if (b->ops->key_dump) if (b->ops->key_dump)
...@@ -1185,9 +1185,12 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, ...@@ -1185,9 +1185,12 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
order); order);
if (!out) { if (!out) {
struct page *outp;
BUG_ON(order > state->page_order); BUG_ON(order > state->page_order);
out = page_address(mempool_alloc(state->pool, GFP_NOIO)); outp = mempool_alloc(state->pool, GFP_NOIO);
out = page_address(outp);
used_mempool = true; used_mempool = true;
order = state->page_order; order = state->page_order;
} }
......
...@@ -1167,7 +1167,7 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) ...@@ -1167,7 +1167,7 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
/* guard against overflow */ /* guard against overflow */
SET_GC_SECTORS_USED(g, min_t(unsigned, SET_GC_SECTORS_USED(g, min_t(unsigned,
GC_SECTORS_USED(g) + KEY_SIZE(k), GC_SECTORS_USED(g) + KEY_SIZE(k),
(1 << 14) - 1)); MAX_GC_SECTORS_USED));
BUG_ON(!GC_SECTORS_USED(g)); BUG_ON(!GC_SECTORS_USED(g));
} }
...@@ -1805,7 +1805,7 @@ static bool btree_insert_key(struct btree *b, struct bkey *k, ...@@ -1805,7 +1805,7 @@ static bool btree_insert_key(struct btree *b, struct bkey *k,
static size_t insert_u64s_remaining(struct btree *b) static size_t insert_u64s_remaining(struct btree *b)
{ {
ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys); long ret = bch_btree_keys_u64s_remaining(&b->keys);
/* /*
* Might land in the middle of an existing extent and have to split it * Might land in the middle of an existing extent and have to split it
......
...@@ -353,14 +353,14 @@ static void bch_data_insert_start(struct closure *cl) ...@@ -353,14 +353,14 @@ static void bch_data_insert_start(struct closure *cl)
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
struct bio *bio = op->bio, *n; struct bio *bio = op->bio, *n;
if (op->bypass)
return bch_data_invalidate(cl);
if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) {
set_gc_sectors(op->c); set_gc_sectors(op->c);
wake_up_gc(op->c); wake_up_gc(op->c);
} }
if (op->bypass)
return bch_data_invalidate(cl);
/* /*
* Journal writes are marked REQ_FLUSH; if the original write was a * Journal writes are marked REQ_FLUSH; if the original write was a
* flush, it'll wait on the journal write. * flush, it'll wait on the journal write.
......
...@@ -416,7 +416,7 @@ static int btree_bset_stats(struct btree_op *b_op, struct btree *b) ...@@ -416,7 +416,7 @@ static int btree_bset_stats(struct btree_op *b_op, struct btree *b)
return MAP_CONTINUE; return MAP_CONTINUE;
} }
int bch_bset_print_stats(struct cache_set *c, char *buf) static int bch_bset_print_stats(struct cache_set *c, char *buf)
{ {
struct bset_stats_op op; struct bset_stats_op op;
int ret; int ret;
......
...@@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio) ...@@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio)
} }
EXPORT_SYMBOL(bio_integrity_free); EXPORT_SYMBOL(bio_integrity_free);
static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
{
if (bip->bip_slab == BIO_POOL_NONE)
return BIP_INLINE_VECS;
return bvec_nr_vecs(bip->bip_slab);
}
/** /**
* bio_integrity_add_page - Attach integrity metadata * bio_integrity_add_page - Attach integrity metadata
* @bio: bio to update * @bio: bio to update
...@@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, ...@@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv; struct bio_vec *iv;
if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
printk(KERN_ERR "%s: bip_vec full\n", __func__); printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0; return 0;
} }
...@@ -226,7 +234,8 @@ unsigned int bio_integrity_tag_size(struct bio *bio) ...@@ -226,7 +234,8 @@ unsigned int bio_integrity_tag_size(struct bio *bio)
} }
EXPORT_SYMBOL(bio_integrity_tag_size); EXPORT_SYMBOL(bio_integrity_tag_size);
int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set) static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
int set)
{ {
struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_integrity_payload *bip = bio->bi_integrity;
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
......
...@@ -611,7 +611,6 @@ EXPORT_SYMBOL(bio_clone_fast); ...@@ -611,7 +611,6 @@ EXPORT_SYMBOL(bio_clone_fast);
struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
struct bio_set *bs) struct bio_set *bs)
{ {
unsigned nr_iovecs = 0;
struct bvec_iter iter; struct bvec_iter iter;
struct bio_vec bv; struct bio_vec bv;
struct bio *bio; struct bio *bio;
...@@ -638,10 +637,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, ...@@ -638,10 +637,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
* __bio_clone_fast() anyways. * __bio_clone_fast() anyways.
*/ */
bio_for_each_segment(bv, bio_src, iter) bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
nr_iovecs++;
bio = bio_alloc_bioset(gfp_mask, nr_iovecs, bs);
if (!bio) if (!bio)
return NULL; return NULL;
...@@ -650,9 +646,18 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, ...@@ -650,9 +646,18 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
if (bio->bi_rw & REQ_DISCARD)
goto integrity_clone;
if (bio->bi_rw & REQ_WRITE_SAME) {
bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
goto integrity_clone;
}
bio_for_each_segment(bv, bio_src, iter) bio_for_each_segment(bv, bio_src, iter)
bio->bi_io_vec[bio->bi_vcnt++] = bv; bio->bi_io_vec[bio->bi_vcnt++] = bv;
integrity_clone:
if (bio_integrity(bio_src)) { if (bio_integrity(bio_src)) {
int ret; int ret;
......
...@@ -250,6 +250,17 @@ static inline unsigned bio_segments(struct bio *bio) ...@@ -250,6 +250,17 @@ static inline unsigned bio_segments(struct bio *bio)
struct bio_vec bv; struct bio_vec bv;
struct bvec_iter iter; struct bvec_iter iter;
/*
* We special case discard/write same, because they interpret bi_size
* differently:
*/
if (bio->bi_rw & REQ_DISCARD)
return 1;
if (bio->bi_rw & REQ_WRITE_SAME)
return 1;
bio_for_each_segment(bv, bio, iter) bio_for_each_segment(bv, bio, iter)
segs++; segs++;
...@@ -332,6 +343,7 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); ...@@ -332,6 +343,7 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
extern struct bio_set *fs_bio_set; extern struct bio_set *fs_bio_set;
unsigned int bio_integrity_tag_size(struct bio *bio);
static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{ {
......
...@@ -83,6 +83,8 @@ struct blk_mq_ops { ...@@ -83,6 +83,8 @@ struct blk_mq_ops {
*/ */
rq_timed_out_fn *timeout; rq_timed_out_fn *timeout;
softirq_done_fn *complete;
/* /*
* Override for hctx allocations (should probably go) * Override for hctx allocations (should probably go)
*/ */
...@@ -119,11 +121,12 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc ...@@ -119,11 +121,12 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc
void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
void blk_mq_insert_request(struct request_queue *, struct request *, bool); void blk_mq_insert_request(struct request_queue *, struct request *,
bool, bool);
void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_run_queues(struct request_queue *q, bool async);
void blk_mq_free_request(struct request *rq); void blk_mq_free_request(struct request *rq);
bool blk_mq_can_queue(struct blk_mq_hw_ctx *); bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp);
struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag);
...@@ -133,6 +136,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); ...@@ -133,6 +136,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
void blk_mq_end_io(struct request *rq, int error); void blk_mq_end_io(struct request *rq, int error);
void blk_mq_complete_request(struct request *rq);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_stop_hw_queues(struct request_queue *q);
......
...@@ -98,7 +98,7 @@ struct request { ...@@ -98,7 +98,7 @@ struct request {
struct list_head queuelist; struct list_head queuelist;
union { union {
struct call_single_data csd; struct call_single_data csd;
struct work_struct mq_flush_data; struct work_struct mq_flush_work;
}; };
struct request_queue *q; struct request_queue *q;
...@@ -448,13 +448,8 @@ struct request_queue { ...@@ -448,13 +448,8 @@ struct request_queue {
unsigned long flush_pending_since; unsigned long flush_pending_since;
struct list_head flush_queue[2]; struct list_head flush_queue[2];
struct list_head flush_data_in_flight; struct list_head flush_data_in_flight;
union { struct request *flush_rq;
struct request flush_rq;
struct {
spinlock_t mq_flush_lock; spinlock_t mq_flush_lock;
struct work_struct mq_flush_work;
};
};
struct mutex sysfs_lock; struct mutex sysfs_lock;
......
...@@ -113,13 +113,13 @@ typedef uint64_t blkif_sector_t; ...@@ -113,13 +113,13 @@ typedef uint64_t blkif_sector_t;
* it's less than the number provided by the backend. The indirect_grefs field * it's less than the number provided by the backend. The indirect_grefs field
* in blkif_request_indirect should be filled by the frontend with the * in blkif_request_indirect should be filled by the frontend with the
* grant references of the pages that are holding the indirect segments. * grant references of the pages that are holding the indirect segments.
* This pages are filled with an array of blkif_request_segment_aligned * These pages are filled with an array of blkif_request_segment that hold the
* that hold the information about the segments. The number of indirect * information about the segments. The number of indirect pages to use is
* pages to use is determined by the maximum number of segments * determined by the number of segments an indirect request contains. Every
* a indirect request contains. Every indirect page can contain a maximum * indirect page can contain a maximum of
* of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)), * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
* so to calculate the number of indirect pages to use we have to do * calculate the number of indirect pages to use we have to do
* ceil(indirect_segments/512). * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
* *
* If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
* create the "feature-max-indirect-segments" node! * create the "feature-max-indirect-segments" node!
...@@ -135,13 +135,12 @@ typedef uint64_t blkif_sector_t; ...@@ -135,13 +135,12 @@ typedef uint64_t blkif_sector_t;
#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
struct blkif_request_segment_aligned { struct blkif_request_segment {
grant_ref_t gref; /* reference to I/O buffer frame */ grant_ref_t gref; /* reference to I/O buffer frame */
/* @first_sect: first sector in frame to transfer (inclusive). */ /* @first_sect: first sector in frame to transfer (inclusive). */
/* @last_sect: last sector in frame to transfer (inclusive). */ /* @last_sect: last sector in frame to transfer (inclusive). */
uint8_t first_sect, last_sect; uint8_t first_sect, last_sect;
uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */ };
} __attribute__((__packed__));
struct blkif_request_rw { struct blkif_request_rw {
uint8_t nr_segments; /* number of segments */ uint8_t nr_segments; /* number of segments */
...@@ -151,12 +150,7 @@ struct blkif_request_rw { ...@@ -151,12 +150,7 @@ struct blkif_request_rw {
#endif #endif
uint64_t id; /* private guest value, echoed in resp */ uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment { struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
grant_ref_t gref; /* reference to I/O buffer frame */
/* @first_sect: first sector in frame to transfer (inclusive). */
/* @last_sect: last sector in frame to transfer (inclusive). */
uint8_t first_sect, last_sect;
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} __attribute__((__packed__)); } __attribute__((__packed__));
struct blkif_request_discard { struct blkif_request_discard {
......
...@@ -54,9 +54,7 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr, ...@@ -54,9 +54,7 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr,
/* /*
* Try to steal tags from a remote cpu's percpu freelist. * Try to steal tags from a remote cpu's percpu freelist.
* *
* We first check how many percpu freelists have tags - we don't steal tags * We first check how many percpu freelists have tags
* unless enough percpu freelists have tags on them that it's possible more than
* half the total tags could be stuck on remote percpu freelists.
* *
* Then we iterate through the cpus until we find some tags - we don't attempt * Then we iterate through the cpus until we find some tags - we don't attempt
* to find the "best" cpu to steal from, to keep cacheline bouncing to a * to find the "best" cpu to steal from, to keep cacheline bouncing to a
...@@ -69,8 +67,7 @@ static inline void steal_tags(struct percpu_ida *pool, ...@@ -69,8 +67,7 @@ static inline void steal_tags(struct percpu_ida *pool,
struct percpu_ida_cpu *remote; struct percpu_ida_cpu *remote;
for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2; cpus_have_tags; cpus_have_tags--) {
cpus_have_tags--) {
cpu = cpumask_next(cpu, &pool->cpus_have_tags); cpu = cpumask_next(cpu, &pool->cpus_have_tags);
if (cpu >= nr_cpu_ids) { if (cpu >= nr_cpu_ids) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment