Commit f1d70248 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A smaller collection of fixes for the block core that would be nice to
  have in -rc2.  This pull request contains:

   - Fixes for races in the wait/wakeup logic used in blk-mq from
     Alexander.  No issues have been observed, but it is definitely a
     bit flakey currently.  Alternatively, we may drop the cyclic
     wakeups going forward, but that needs more testing.

   - Some cleanups from Christoph.

   - Fix for an oops in null_blk if queue_mode=1 and softirq completions
     are used.  From me.

   - A fix for a regression caused by the chunk size setting.  It
     inadvertently used max_hw_sectors instead of max_sectors, which is
     incorrect, and causes hangs on btrfs multi-disk setups (where hw
     sectors apparently isn't set).  From me.

   - Removal of WQ_POWER_EFFICIENT in the kblockd creation.  This was a
     recent addition as well, but it actually breaks blk-mq which relies
     on strict scheduling.  If the workqueue power_efficient mode is
     turned on, this breaks blk-mq.  From Matias.

   - null_blk module parameter description fix from Mike"

* 'for-linus' of git://git.kernel.dk/linux-block:
  blk-mq: bitmap tag: fix races in bt_get() function
  blk-mq: bitmap tag: fix race on blk_mq_bitmap_tags::wake_cnt
  blk-mq: bitmap tag: fix races on shared ::wake_index fields
  block: blk_max_size_offset() should check ->max_sectors
  null_blk: fix softirq completions for queue_mode == 1
  blk-mq: merge blk_mq_drain_queue and __blk_mq_drain_queue
  blk-mq: properly drain stopped queues
  block: remove WQ_POWER_EFFICIENT from kblockd
  null_blk: fix name and description of 'queue_mode' module parameter
  block: remove elv_abort_queue and blk_abort_flushes
parents 58c72f94 86fb5c56
...@@ -3312,8 +3312,7 @@ int __init blk_dev_init(void) ...@@ -3312,8 +3312,7 @@ int __init blk_dev_init(void)
/* used for unplugging and affects IO latency/throughput - HIGHPRI */ /* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd", kblockd_workqueue = alloc_workqueue("kblockd",
WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
WQ_POWER_EFFICIENT, 0);
if (!kblockd_workqueue) if (!kblockd_workqueue)
panic("Failed to create kblockd\n"); panic("Failed to create kblockd\n");
......
...@@ -421,44 +421,6 @@ void blk_insert_flush(struct request *rq) ...@@ -421,44 +421,6 @@ void blk_insert_flush(struct request *rq)
blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
} }
/**
* blk_abort_flushes - @q is being aborted, abort flush requests
* @q: request_queue being aborted
*
* To be called from elv_abort_queue(). @q is being aborted. Prepare all
* FLUSH/FUA requests for abortion.
*
* CONTEXT:
* spin_lock_irq(q->queue_lock)
*/
void blk_abort_flushes(struct request_queue *q)
{
struct request *rq, *n;
int i;
/*
* Requests in flight for data are already owned by the dispatch
* queue or the device driver. Just restore for normal completion.
*/
list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
list_del_init(&rq->flush.list);
blk_flush_restore_request(rq);
}
/*
* We need to give away requests on flush queues. Restore for
* normal completion and put them on the dispatch queue.
*/
for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
list_for_each_entry_safe(rq, n, &q->flush_queue[i],
flush.list) {
list_del_init(&rq->flush.list);
blk_flush_restore_request(rq);
list_add_tail(&rq->queuelist, &q->queue_head);
}
}
}
/** /**
* blkdev_issue_flush - queue a flush * blkdev_issue_flush - queue a flush
* @bdev: blockdev to issue flush for * @bdev: blockdev to issue flush for
......
...@@ -43,9 +43,16 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags) ...@@ -43,9 +43,16 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
return bt_has_free_tags(&tags->bitmap_tags); return bt_has_free_tags(&tags->bitmap_tags);
} }
static inline void bt_index_inc(unsigned int *index) static inline int bt_index_inc(int index)
{ {
*index = (*index + 1) & (BT_WAIT_QUEUES - 1); return (index + 1) & (BT_WAIT_QUEUES - 1);
}
static inline void bt_index_atomic_inc(atomic_t *index)
{
int old = atomic_read(index);
int new = bt_index_inc(old);
atomic_cmpxchg(index, old, new);
} }
/* /*
...@@ -69,14 +76,14 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) ...@@ -69,14 +76,14 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
int i, wake_index; int i, wake_index;
bt = &tags->bitmap_tags; bt = &tags->bitmap_tags;
wake_index = bt->wake_index; wake_index = atomic_read(&bt->wake_index);
for (i = 0; i < BT_WAIT_QUEUES; i++) { for (i = 0; i < BT_WAIT_QUEUES; i++) {
struct bt_wait_state *bs = &bt->bs[wake_index]; struct bt_wait_state *bs = &bt->bs[wake_index];
if (waitqueue_active(&bs->wait)) if (waitqueue_active(&bs->wait))
wake_up(&bs->wait); wake_up(&bs->wait);
bt_index_inc(&wake_index); wake_index = bt_index_inc(wake_index);
} }
} }
...@@ -212,12 +219,14 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, ...@@ -212,12 +219,14 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
struct blk_mq_hw_ctx *hctx) struct blk_mq_hw_ctx *hctx)
{ {
struct bt_wait_state *bs; struct bt_wait_state *bs;
int wait_index;
if (!hctx) if (!hctx)
return &bt->bs[0]; return &bt->bs[0];
bs = &bt->bs[hctx->wait_index]; wait_index = atomic_read(&hctx->wait_index);
bt_index_inc(&hctx->wait_index); bs = &bt->bs[wait_index];
bt_index_atomic_inc(&hctx->wait_index);
return bs; return bs;
} }
...@@ -239,18 +248,12 @@ static int bt_get(struct blk_mq_alloc_data *data, ...@@ -239,18 +248,12 @@ static int bt_get(struct blk_mq_alloc_data *data,
bs = bt_wait_ptr(bt, hctx); bs = bt_wait_ptr(bt, hctx);
do { do {
bool was_empty;
was_empty = list_empty(&wait.task_list);
prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
tag = __bt_get(hctx, bt, last_tag); tag = __bt_get(hctx, bt, last_tag);
if (tag != -1) if (tag != -1)
break; break;
if (was_empty)
atomic_set(&bs->wait_cnt, bt->wake_cnt);
blk_mq_put_ctx(data->ctx); blk_mq_put_ctx(data->ctx);
io_schedule(); io_schedule();
...@@ -313,18 +316,19 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) ...@@ -313,18 +316,19 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
{ {
int i, wake_index; int i, wake_index;
wake_index = bt->wake_index; wake_index = atomic_read(&bt->wake_index);
for (i = 0; i < BT_WAIT_QUEUES; i++) { for (i = 0; i < BT_WAIT_QUEUES; i++) {
struct bt_wait_state *bs = &bt->bs[wake_index]; struct bt_wait_state *bs = &bt->bs[wake_index];
if (waitqueue_active(&bs->wait)) { if (waitqueue_active(&bs->wait)) {
if (wake_index != bt->wake_index) int o = atomic_read(&bt->wake_index);
bt->wake_index = wake_index; if (wake_index != o)
atomic_cmpxchg(&bt->wake_index, o, wake_index);
return bs; return bs;
} }
bt_index_inc(&wake_index); wake_index = bt_index_inc(wake_index);
} }
return NULL; return NULL;
...@@ -334,6 +338,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) ...@@ -334,6 +338,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
{ {
const int index = TAG_TO_INDEX(bt, tag); const int index = TAG_TO_INDEX(bt, tag);
struct bt_wait_state *bs; struct bt_wait_state *bs;
int wait_cnt;
/* /*
* The unlock memory barrier need to order access to req in free * The unlock memory barrier need to order access to req in free
...@@ -342,10 +347,19 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) ...@@ -342,10 +347,19 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word);
bs = bt_wake_ptr(bt); bs = bt_wake_ptr(bt);
if (bs && atomic_dec_and_test(&bs->wait_cnt)) { if (!bs)
atomic_set(&bs->wait_cnt, bt->wake_cnt); return;
bt_index_inc(&bt->wake_index);
wait_cnt = atomic_dec_return(&bs->wait_cnt);
if (wait_cnt == 0) {
wake:
atomic_add(bt->wake_cnt, &bs->wait_cnt);
bt_index_atomic_inc(&bt->wake_index);
wake_up(&bs->wait); wake_up(&bs->wait);
} else if (wait_cnt < 0) {
wait_cnt = atomic_inc_return(&bs->wait_cnt);
if (!wait_cnt)
goto wake;
} }
} }
...@@ -499,10 +513,13 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, ...@@ -499,10 +513,13 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
return -ENOMEM; return -ENOMEM;
} }
for (i = 0; i < BT_WAIT_QUEUES; i++) bt_update_count(bt, depth);
for (i = 0; i < BT_WAIT_QUEUES; i++) {
init_waitqueue_head(&bt->bs[i].wait); init_waitqueue_head(&bt->bs[i].wait);
atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt);
}
bt_update_count(bt, depth);
return 0; return 0;
} }
......
...@@ -24,7 +24,7 @@ struct blk_mq_bitmap_tags { ...@@ -24,7 +24,7 @@ struct blk_mq_bitmap_tags {
unsigned int map_nr; unsigned int map_nr;
struct blk_align_bitmap *map; struct blk_align_bitmap *map;
unsigned int wake_index; atomic_t wake_index;
struct bt_wait_state *bs; struct bt_wait_state *bs;
}; };
......
...@@ -109,7 +109,7 @@ static void blk_mq_queue_exit(struct request_queue *q) ...@@ -109,7 +109,7 @@ static void blk_mq_queue_exit(struct request_queue *q)
__percpu_counter_add(&q->mq_usage_counter, -1, 1000000); __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
} }
static void __blk_mq_drain_queue(struct request_queue *q) void blk_mq_drain_queue(struct request_queue *q)
{ {
while (true) { while (true) {
s64 count; s64 count;
...@@ -120,7 +120,7 @@ static void __blk_mq_drain_queue(struct request_queue *q) ...@@ -120,7 +120,7 @@ static void __blk_mq_drain_queue(struct request_queue *q)
if (count == 0) if (count == 0)
break; break;
blk_mq_run_queues(q, false); blk_mq_start_hw_queues(q);
msleep(10); msleep(10);
} }
} }
...@@ -139,12 +139,7 @@ static void blk_mq_freeze_queue(struct request_queue *q) ...@@ -139,12 +139,7 @@ static void blk_mq_freeze_queue(struct request_queue *q)
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
if (drain) if (drain)
__blk_mq_drain_queue(q); blk_mq_drain_queue(q);
}
void blk_mq_drain_queue(struct request_queue *q)
{
__blk_mq_drain_queue(q);
} }
static void blk_mq_unfreeze_queue(struct request_queue *q) static void blk_mq_unfreeze_queue(struct request_queue *q)
......
...@@ -84,7 +84,6 @@ static inline void blk_clear_rq_complete(struct request *rq) ...@@ -84,7 +84,6 @@ static inline void blk_clear_rq_complete(struct request *rq)
#define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) #define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED)
void blk_insert_flush(struct request *rq); void blk_insert_flush(struct request *rq);
void blk_abort_flushes(struct request_queue *q);
static inline struct request *__elv_next_request(struct request_queue *q) static inline struct request *__elv_next_request(struct request_queue *q)
{ {
......
...@@ -729,26 +729,6 @@ int elv_may_queue(struct request_queue *q, int rw) ...@@ -729,26 +729,6 @@ int elv_may_queue(struct request_queue *q, int rw)
return ELV_MQUEUE_MAY; return ELV_MQUEUE_MAY;
} }
void elv_abort_queue(struct request_queue *q)
{
struct request *rq;
blk_abort_flushes(q);
while (!list_empty(&q->queue_head)) {
rq = list_entry_rq(q->queue_head.next);
rq->cmd_flags |= REQ_QUIET;
trace_block_rq_abort(q, rq);
/*
* Mark this request as started so we don't trigger
* any debug logic in the end I/O path.
*/
blk_start_request(rq);
__blk_end_request_all(rq, -EIO);
}
}
EXPORT_SYMBOL(elv_abort_queue);
void elv_completed_request(struct request_queue *q, struct request *rq) void elv_completed_request(struct request_queue *q, struct request *rq)
{ {
struct elevator_queue *e = q->elevator; struct elevator_queue *e = q->elevator;
......
...@@ -79,7 +79,7 @@ MODULE_PARM_DESC(home_node, "Home node for the device"); ...@@ -79,7 +79,7 @@ MODULE_PARM_DESC(home_node, "Home node for the device");
static int queue_mode = NULL_Q_MQ; static int queue_mode = NULL_Q_MQ;
module_param(queue_mode, int, S_IRUGO); module_param(queue_mode, int, S_IRUGO);
MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)"); MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
static int gb = 250; static int gb = 250;
module_param(gb, int, S_IRUGO); module_param(gb, int, S_IRUGO);
...@@ -227,7 +227,10 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) ...@@ -227,7 +227,10 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
static void null_softirq_done_fn(struct request *rq) static void null_softirq_done_fn(struct request *rq)
{ {
end_cmd(blk_mq_rq_to_pdu(rq)); if (queue_mode == NULL_Q_MQ)
end_cmd(blk_mq_rq_to_pdu(rq));
else
end_cmd(rq->special);
} }
static inline void null_handle_cmd(struct nullb_cmd *cmd) static inline void null_handle_cmd(struct nullb_cmd *cmd)
......
...@@ -42,7 +42,7 @@ struct blk_mq_hw_ctx { ...@@ -42,7 +42,7 @@ struct blk_mq_hw_ctx {
unsigned int nr_ctx; unsigned int nr_ctx;
struct blk_mq_ctx **ctxs; struct blk_mq_ctx **ctxs;
unsigned int wait_index; atomic_t wait_index;
struct blk_mq_tags *tags; struct blk_mq_tags *tags;
......
...@@ -920,7 +920,7 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, ...@@ -920,7 +920,7 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
sector_t offset) sector_t offset)
{ {
if (!q->limits.chunk_sectors) if (!q->limits.chunk_sectors)
return q->limits.max_hw_sectors; return q->limits.max_sectors;
return q->limits.chunk_sectors - return q->limits.chunk_sectors -
(offset & (q->limits.chunk_sectors - 1)); (offset & (q->limits.chunk_sectors - 1));
......
...@@ -133,7 +133,6 @@ extern struct request *elv_latter_request(struct request_queue *, struct request ...@@ -133,7 +133,6 @@ extern struct request *elv_latter_request(struct request_queue *, struct request
extern int elv_register_queue(struct request_queue *q); extern int elv_register_queue(struct request_queue *q);
extern void elv_unregister_queue(struct request_queue *q); extern void elv_unregister_queue(struct request_queue *q);
extern int elv_may_queue(struct request_queue *, int); extern int elv_may_queue(struct request_queue *, int);
extern void elv_abort_queue(struct request_queue *);
extern void elv_completed_request(struct request_queue *, struct request *); extern void elv_completed_request(struct request_queue *, struct request *);
extern int elv_set_request(struct request_queue *q, struct request *rq, extern int elv_set_request(struct request_queue *q, struct request *rq,
struct bio *bio, gfp_t gfp_mask); struct bio *bio, gfp_t gfp_mask);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment