Commit 8d829b9b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "This contains a set of fixes for xen-blkback by way of Konrad, and a
  performance regression fix for blk-mq for shared tags.

  The latter could account for as much as a 50x reduction in
  performance, with the test case from the user with 500 name spaces. A
  more realistic setup on my end with 32 drives showed a 3.5x drop. The
  fix has been thoroughly tested before being committed"

* 'for-linus' of git://git.kernel.dk/linux-block:
  blk-mq: fix performance regression with shared tags
  xen-blkback: don't leak stack data via response ring
  xen/blkback: don't use xen_blkif_get() in xen-blkback kthread
  xen/blkback: don't free be structure too early
  xen/blkback: fix disconnect while I/Os in flight
parents 48b6bbef 8e8320c9
...@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q, ...@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
__blk_mq_sched_assign_ioc(q, rq, bio, ioc); __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
} }
/*
* Mark a hardware queue as needing a restart. For shared queues, maintain
* a count of how many hardware queues are marked for restart.
*/
static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return;
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_inc(&q->shared_hctx_restart);
} else
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
return false;
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
struct request_queue *q = hctx->queue;
if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_dec(&q->shared_hctx_restart);
} else
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}
return false;
}
struct request *blk_mq_sched_get_request(struct request_queue *q, struct request *blk_mq_sched_get_request(struct request_queue *q,
struct bio *bio, struct bio *bio,
unsigned int op, unsigned int op,
...@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, ...@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
return true; return true;
} }
static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
if (blk_mq_hctx_has_pending(hctx)) {
blk_mq_run_hw_queue(hctx, true);
return true;
}
}
return false;
}
/** /**
* list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
* @pos: loop cursor. * @pos: loop cursor.
...@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) ...@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
unsigned int i, j; unsigned int i, j;
if (set->flags & BLK_MQ_F_TAG_SHARED) { if (set->flags & BLK_MQ_F_TAG_SHARED) {
/*
* If this is 0, then we know that no hardware queues
* have RESTART marked. We're done.
*/
if (!atomic_read(&queue->shared_hctx_restart))
return;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu_rr(q, queue, &set->tag_list, list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
tag_set_list) { tag_set_list) {
......
...@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx) ...@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
return false; return false;
} }
/*
* Mark a hardware queue as needing a restart.
*/
static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
{
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
}
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
{ {
return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
......
...@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q, ...@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
} }
} }
/*
* Caller needs to ensure that we're either frozen/quiesced, or that
* the queue isn't live yet.
*/
static void queue_set_hctx_shared(struct request_queue *q, bool shared) static void queue_set_hctx_shared(struct request_queue *q, bool shared)
{ {
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
int i; int i;
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
if (shared) if (shared) {
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_inc(&q->shared_hctx_restart);
hctx->flags |= BLK_MQ_F_TAG_SHARED; hctx->flags |= BLK_MQ_F_TAG_SHARED;
else } else {
if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
atomic_dec(&q->shared_hctx_restart);
hctx->flags &= ~BLK_MQ_F_TAG_SHARED; hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
}
} }
} }
static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
bool shared)
{ {
struct request_queue *q; struct request_queue *q;
......
...@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg) ...@@ -609,8 +609,6 @@ int xen_blkif_schedule(void *arg)
unsigned long timeout; unsigned long timeout;
int ret; int ret;
xen_blkif_get(blkif);
set_freezable(); set_freezable();
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
if (try_to_freeze()) if (try_to_freeze())
...@@ -665,7 +663,6 @@ int xen_blkif_schedule(void *arg) ...@@ -665,7 +663,6 @@ int xen_blkif_schedule(void *arg)
print_stats(ring); print_stats(ring);
ring->xenblkd = NULL; ring->xenblkd = NULL;
xen_blkif_put(blkif);
return 0; return 0;
} }
...@@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, ...@@ -1436,34 +1433,35 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
static void make_response(struct xen_blkif_ring *ring, u64 id, static void make_response(struct xen_blkif_ring *ring, u64 id,
unsigned short op, int st) unsigned short op, int st)
{ {
struct blkif_response resp; struct blkif_response *resp;
unsigned long flags; unsigned long flags;
union blkif_back_rings *blk_rings; union blkif_back_rings *blk_rings;
int notify; int notify;
resp.id = id;
resp.operation = op;
resp.status = st;
spin_lock_irqsave(&ring->blk_ring_lock, flags); spin_lock_irqsave(&ring->blk_ring_lock, flags);
blk_rings = &ring->blk_rings; blk_rings = &ring->blk_rings;
/* Place on the response ring for the relevant domain. */ /* Place on the response ring for the relevant domain. */
switch (ring->blkif->blk_protocol) { switch (ring->blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE: case BLKIF_PROTOCOL_NATIVE:
memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), resp = RING_GET_RESPONSE(&blk_rings->native,
&resp, sizeof(resp)); blk_rings->native.rsp_prod_pvt);
break; break;
case BLKIF_PROTOCOL_X86_32: case BLKIF_PROTOCOL_X86_32:
memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), resp = RING_GET_RESPONSE(&blk_rings->x86_32,
&resp, sizeof(resp)); blk_rings->x86_32.rsp_prod_pvt);
break; break;
case BLKIF_PROTOCOL_X86_64: case BLKIF_PROTOCOL_X86_64:
memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), resp = RING_GET_RESPONSE(&blk_rings->x86_64,
&resp, sizeof(resp)); blk_rings->x86_64.rsp_prod_pvt);
break; break;
default: default:
BUG(); BUG();
} }
resp->id = id;
resp->operation = op;
resp->status = st;
blk_rings->common.rsp_prod_pvt++; blk_rings->common.rsp_prod_pvt++;
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
spin_unlock_irqrestore(&ring->blk_ring_lock, flags); spin_unlock_irqrestore(&ring->blk_ring_lock, flags);
......
...@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues; ...@@ -75,9 +75,8 @@ extern unsigned int xenblk_max_queues;
struct blkif_common_request { struct blkif_common_request {
char dummy; char dummy;
}; };
struct blkif_common_response {
char dummy; /* i386 protocol version */
};
struct blkif_x86_32_request_rw { struct blkif_x86_32_request_rw {
uint8_t nr_segments; /* number of segments */ uint8_t nr_segments; /* number of segments */
...@@ -129,14 +128,6 @@ struct blkif_x86_32_request { ...@@ -129,14 +128,6 @@ struct blkif_x86_32_request {
} u; } u;
} __attribute__((__packed__)); } __attribute__((__packed__));
/* i386 protocol version */
#pragma pack(push, 4)
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
#pragma pack(pop)
/* x86_64 protocol version */ /* x86_64 protocol version */
struct blkif_x86_64_request_rw { struct blkif_x86_64_request_rw {
...@@ -193,18 +184,12 @@ struct blkif_x86_64_request { ...@@ -193,18 +184,12 @@ struct blkif_x86_64_request {
} u; } u;
} __attribute__((__packed__)); } __attribute__((__packed__));
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, DEFINE_RING_TYPES(blkif_common, struct blkif_common_request,
struct blkif_common_response); struct blkif_response);
DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request,
struct blkif_x86_32_response); struct blkif_response __packed);
DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request,
struct blkif_x86_64_response); struct blkif_response);
union blkif_back_rings { union blkif_back_rings {
struct blkif_back_ring native; struct blkif_back_ring native;
...@@ -281,6 +266,7 @@ struct xen_blkif_ring { ...@@ -281,6 +266,7 @@ struct xen_blkif_ring {
wait_queue_head_t wq; wait_queue_head_t wq;
atomic_t inflight; atomic_t inflight;
bool active;
/* One thread per blkif ring. */ /* One thread per blkif ring. */
struct task_struct *xenblkd; struct task_struct *xenblkd;
unsigned int waiting_reqs; unsigned int waiting_reqs;
......
...@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif) ...@@ -159,7 +159,7 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
init_waitqueue_head(&ring->shutdown_wq); init_waitqueue_head(&ring->shutdown_wq);
ring->blkif = blkif; ring->blkif = blkif;
ring->st_print = jiffies; ring->st_print = jiffies;
xen_blkif_get(blkif); ring->active = true;
} }
return 0; return 0;
...@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) ...@@ -249,10 +249,12 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
struct xen_blkif_ring *ring = &blkif->rings[r]; struct xen_blkif_ring *ring = &blkif->rings[r];
unsigned int i = 0; unsigned int i = 0;
if (!ring->active)
continue;
if (ring->xenblkd) { if (ring->xenblkd) {
kthread_stop(ring->xenblkd); kthread_stop(ring->xenblkd);
wake_up(&ring->shutdown_wq); wake_up(&ring->shutdown_wq);
ring->xenblkd = NULL;
} }
/* The above kthread_stop() guarantees that at this point we /* The above kthread_stop() guarantees that at this point we
...@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) ...@@ -296,7 +298,7 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
BUG_ON(ring->free_pages_num != 0); BUG_ON(ring->free_pages_num != 0);
BUG_ON(ring->persistent_gnt_c != 0); BUG_ON(ring->persistent_gnt_c != 0);
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages)); WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
xen_blkif_put(blkif); ring->active = false;
} }
blkif->nr_ring_pages = 0; blkif->nr_ring_pages = 0;
/* /*
...@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif) ...@@ -312,9 +314,10 @@ static int xen_blkif_disconnect(struct xen_blkif *blkif)
static void xen_blkif_free(struct xen_blkif *blkif) static void xen_blkif_free(struct xen_blkif *blkif)
{ {
WARN_ON(xen_blkif_disconnect(blkif));
xen_blkif_disconnect(blkif);
xen_vbd_free(&blkif->vbd); xen_vbd_free(&blkif->vbd);
kfree(blkif->be->mode);
kfree(blkif->be);
/* Make sure everything is drained before shutting down */ /* Make sure everything is drained before shutting down */
kmem_cache_free(xen_blkif_cachep, blkif); kmem_cache_free(xen_blkif_cachep, blkif);
...@@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev) ...@@ -511,8 +514,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
xen_blkif_put(be->blkif); xen_blkif_put(be->blkif);
} }
kfree(be->mode);
kfree(be);
return 0; return 0;
} }
......
...@@ -391,6 +391,8 @@ struct request_queue { ...@@ -391,6 +391,8 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
atomic_t shared_hctx_restart;
struct blk_queue_stats *stats; struct blk_queue_stats *stats;
struct rq_wb *rq_wb; struct rq_wb *rq_wb;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment