Commit 1daac193 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A collection of fixes since the merge window;

   - fix for a double elevator module release, from Chao Yu.  Ancient bug.

   - the splice() MORE flag fix from Christophe Leroy.

   - a fix for NVMe, fixing a patch that went in in the merge window.
     From Keith.

   - two fixes for blk-mq CPU hotplug handling, from Ming Lei.

   - bdi vs blockdev lifetime fix from Neil Brown, fixing and oops in md.

   - two blk-mq fixes from Shaohua, fixing a race on queue stop and a
     bad merge issue with FUA writes.

   - division-by-zero fix for writeback from Tejun.

   - a block bounce page accounting fix, making sure we inc/dec after
     bouncing so that pre/post IO pages match up.  From Wang YanQing"

* 'for-linus' of git://git.kernel.dk/linux-block:
  splice: sendfile() at once fails for big files
  blk-mq: don't lose requests if a stopped queue restarts
  blk-mq: fix FUA request hang
  block: destroy bdi before blockdev is unregistered.
  block:bounce: fix call inc_|dec_zone_page_state on different pages confuse value of NR_BOUNCE
  elevator: fix double release of elevator module
  writeback: use |1 instead of +1 to protect against div by zero
  blk-mq: fix CPU hotplug handling
  blk-mq: fix race between timeout and CPU hotplug
  NVMe: Fix VPD B0 max sectors translation
parents 41c64bb1 0ff28d9f
...@@ -552,6 +552,8 @@ void blk_cleanup_queue(struct request_queue *q) ...@@ -552,6 +552,8 @@ void blk_cleanup_queue(struct request_queue *q)
q->queue_lock = &q->__queue_lock; q->queue_lock = &q->__queue_lock;
spin_unlock_irq(lock); spin_unlock_irq(lock);
bdi_destroy(&q->backing_dev_info);
/* @q is and will stay empty, shutdown and put */ /* @q is and will stay empty, shutdown and put */
blk_put_queue(q); blk_put_queue(q);
} }
......
...@@ -677,9 +677,12 @@ static void blk_mq_rq_timer(unsigned long priv) ...@@ -677,9 +677,12 @@ static void blk_mq_rq_timer(unsigned long priv)
data.next = blk_rq_timeout(round_jiffies_up(data.next)); data.next = blk_rq_timeout(round_jiffies_up(data.next));
mod_timer(&q->timeout, data.next); mod_timer(&q->timeout, data.next);
} else { } else {
queue_for_each_hw_ctx(q, hctx, i) queue_for_each_hw_ctx(q, hctx, i) {
/* the hctx may be unmapped, so check it here */
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_idle(hctx); blk_mq_tag_idle(hctx);
} }
}
} }
/* /*
...@@ -855,6 +858,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) ...@@ -855,6 +858,16 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
spin_lock(&hctx->lock); spin_lock(&hctx->lock);
list_splice(&rq_list, &hctx->dispatch); list_splice(&rq_list, &hctx->dispatch);
spin_unlock(&hctx->lock); spin_unlock(&hctx->lock);
/*
* the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
* it's possible the queue is stopped and restarted again
* before this. Queue restart will dispatch requests. And since
* requests in rq_list aren't added into hctx->dispatch yet,
* the requests in rq_list might get lost.
*
* blk_mq_run_hw_queue() already checks the STOPPED bit
**/
blk_mq_run_hw_queue(hctx, true);
} }
} }
...@@ -1571,22 +1584,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) ...@@ -1571,22 +1584,6 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
return NOTIFY_OK; return NOTIFY_OK;
} }
static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu)
{
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;
if (set->tags[hctx->queue_num])
return NOTIFY_OK;
set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num);
if (!set->tags[hctx->queue_num])
return NOTIFY_STOP;
hctx->tags = set->tags[hctx->queue_num];
return NOTIFY_OK;
}
static int blk_mq_hctx_notify(void *data, unsigned long action, static int blk_mq_hctx_notify(void *data, unsigned long action,
unsigned int cpu) unsigned int cpu)
{ {
...@@ -1594,8 +1591,11 @@ static int blk_mq_hctx_notify(void *data, unsigned long action, ...@@ -1594,8 +1591,11 @@ static int blk_mq_hctx_notify(void *data, unsigned long action,
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
return blk_mq_hctx_cpu_offline(hctx, cpu); return blk_mq_hctx_cpu_offline(hctx, cpu);
else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
return blk_mq_hctx_cpu_online(hctx, cpu); /*
* In case of CPU online, tags may be reallocated
* in blk_mq_map_swqueue() after mapping is updated.
*/
return NOTIFY_OK; return NOTIFY_OK;
} }
...@@ -1775,6 +1775,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) ...@@ -1775,6 +1775,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
unsigned int i; unsigned int i;
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx; struct blk_mq_ctx *ctx;
struct blk_mq_tag_set *set = q->tag_set;
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
cpumask_clear(hctx->cpumask); cpumask_clear(hctx->cpumask);
...@@ -1803,16 +1804,20 @@ static void blk_mq_map_swqueue(struct request_queue *q) ...@@ -1803,16 +1804,20 @@ static void blk_mq_map_swqueue(struct request_queue *q)
* disable it and free the request entries. * disable it and free the request entries.
*/ */
if (!hctx->nr_ctx) { if (!hctx->nr_ctx) {
struct blk_mq_tag_set *set = q->tag_set;
if (set->tags[i]) { if (set->tags[i]) {
blk_mq_free_rq_map(set, set->tags[i], i); blk_mq_free_rq_map(set, set->tags[i], i);
set->tags[i] = NULL; set->tags[i] = NULL;
hctx->tags = NULL;
} }
hctx->tags = NULL;
continue; continue;
} }
/* unmapped hw queue can be remapped after CPU topo changed */
if (!set->tags[i])
set->tags[i] = blk_mq_init_rq_map(set, i);
hctx->tags = set->tags[i];
WARN_ON(!hctx->tags);
/* /*
* Set the map size to the number of mapped software queues. * Set the map size to the number of mapped software queues.
* This is more accurate and more efficient than looping * This is more accurate and more efficient than looping
...@@ -2090,9 +2095,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, ...@@ -2090,9 +2095,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
*/ */
list_for_each_entry(q, &all_q_list, all_q_node) list_for_each_entry(q, &all_q_list, all_q_node)
blk_mq_freeze_queue_start(q); blk_mq_freeze_queue_start(q);
list_for_each_entry(q, &all_q_list, all_q_node) list_for_each_entry(q, &all_q_list, all_q_node) {
blk_mq_freeze_queue_wait(q); blk_mq_freeze_queue_wait(q);
/*
* timeout handler can't touch hw queue during the
* reinitialization
*/
del_timer_sync(&q->timeout);
}
list_for_each_entry(q, &all_q_list, all_q_node) list_for_each_entry(q, &all_q_list, all_q_node)
blk_mq_queue_reinit(q); blk_mq_queue_reinit(q);
......
...@@ -522,8 +522,6 @@ static void blk_release_queue(struct kobject *kobj) ...@@ -522,8 +522,6 @@ static void blk_release_queue(struct kobject *kobj)
blk_trace_shutdown(q); blk_trace_shutdown(q);
bdi_destroy(&q->backing_dev_info);
ida_simple_remove(&blk_queue_ida, q->id); ida_simple_remove(&blk_queue_ida, q->id);
call_rcu(&q->rcu_head, blk_free_queue_rcu); call_rcu(&q->rcu_head, blk_free_queue_rcu);
} }
......
...@@ -221,8 +221,8 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, ...@@ -221,8 +221,8 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force) if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
continue; continue;
inc_zone_page_state(to->bv_page, NR_BOUNCE);
to->bv_page = mempool_alloc(pool, q->bounce_gfp); to->bv_page = mempool_alloc(pool, q->bounce_gfp);
inc_zone_page_state(to->bv_page, NR_BOUNCE);
if (rw == WRITE) { if (rw == WRITE) {
char *vto, *vfrom; char *vto, *vfrom;
......
...@@ -157,7 +157,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, ...@@ -157,7 +157,7 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node); eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
if (unlikely(!eq)) if (unlikely(!eq))
goto err; return NULL;
eq->type = e; eq->type = e;
kobject_init(&eq->kobj, &elv_ktype); kobject_init(&eq->kobj, &elv_ktype);
...@@ -165,10 +165,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q, ...@@ -165,10 +165,6 @@ struct elevator_queue *elevator_alloc(struct request_queue *q,
hash_init(eq->hash); hash_init(eq->hash);
return eq; return eq;
err:
kfree(eq);
elevator_put(e);
return NULL;
} }
EXPORT_SYMBOL(elevator_alloc); EXPORT_SYMBOL(elevator_alloc);
......
...@@ -1620,8 +1620,8 @@ static int loop_add(struct loop_device **l, int i) ...@@ -1620,8 +1620,8 @@ static int loop_add(struct loop_device **l, int i)
static void loop_remove(struct loop_device *lo) static void loop_remove(struct loop_device *lo)
{ {
del_gendisk(lo->lo_disk);
blk_cleanup_queue(lo->lo_queue); blk_cleanup_queue(lo->lo_queue);
del_gendisk(lo->lo_disk);
blk_mq_free_tag_set(&lo->tag_set); blk_mq_free_tag_set(&lo->tag_set);
put_disk(lo->lo_disk); put_disk(lo->lo_disk);
kfree(lo); kfree(lo);
......
...@@ -944,7 +944,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -944,7 +944,8 @@ static int nvme_trans_ext_inq_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, static int nvme_trans_bdev_limits_page(struct nvme_ns *ns, struct sg_io_hdr *hdr,
u8 *inq_response, int alloc_len) u8 *inq_response, int alloc_len)
{ {
__be32 max_sectors = cpu_to_be32(queue_max_hw_sectors(ns->queue)); __be32 max_sectors = cpu_to_be32(
nvme_block_nr(ns, queue_max_hw_sectors(ns->queue)));
__be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors); __be32 max_discard = cpu_to_be32(ns->queue->limits.max_discard_sectors);
__be32 discard_desc_count = cpu_to_be32(0x100); __be32 discard_desc_count = cpu_to_be32(0x100);
......
...@@ -4818,12 +4818,12 @@ static void md_free(struct kobject *ko) ...@@ -4818,12 +4818,12 @@ static void md_free(struct kobject *ko)
if (mddev->sysfs_state) if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state); sysfs_put(mddev->sysfs_state);
if (mddev->queue)
blk_cleanup_queue(mddev->queue);
if (mddev->gendisk) { if (mddev->gendisk) {
del_gendisk(mddev->gendisk); del_gendisk(mddev->gendisk);
put_disk(mddev->gendisk); put_disk(mddev->gendisk);
} }
if (mddev->queue)
blk_cleanup_queue(mddev->queue);
kfree(mddev); kfree(mddev);
} }
......
...@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ...@@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
long ret, bytes; long ret, bytes;
umode_t i_mode; umode_t i_mode;
size_t len; size_t len;
int i, flags; int i, flags, more;
/* /*
* We require the input being a regular file, as we don't want to * We require the input being a regular file, as we don't want to
...@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ...@@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
* Don't block on output, we have to drain the direct pipe. * Don't block on output, we have to drain the direct pipe.
*/ */
sd->flags &= ~SPLICE_F_NONBLOCK; sd->flags &= ~SPLICE_F_NONBLOCK;
more = sd->flags & SPLICE_F_MORE;
while (len) { while (len) {
size_t read_len; size_t read_len;
...@@ -1216,6 +1217,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ...@@ -1216,6 +1217,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
read_len = ret; read_len = ret;
sd->total_len = read_len; sd->total_len = read_len;
/*
* If more data is pending, set SPLICE_F_MORE
* If this is the last data and SPLICE_F_MORE was not set
* initially, clears it.
*/
if (read_len < len)
sd->flags |= SPLICE_F_MORE;
else if (!more)
sd->flags &= ~SPLICE_F_MORE;
/* /*
* NOTE: nonblocking mode only applies to the input. We * NOTE: nonblocking mode only applies to the input. We
* must not do the output in nonblocking mode as then we * must not do the output in nonblocking mode as then we
......
...@@ -220,7 +220,7 @@ enum rq_flag_bits { ...@@ -220,7 +220,7 @@ enum rq_flag_bits {
/* This mask is used for both bio and request merge checking */ /* This mask is used for both bio and request merge checking */
#define REQ_NOMERGE_FLAGS \ #define REQ_NOMERGE_FLAGS \
(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ)
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_THROTTLED (1ULL << __REQ_THROTTLED) #define REQ_THROTTLED (1ULL << __REQ_THROTTLED)
......
...@@ -580,7 +580,7 @@ static long long pos_ratio_polynom(unsigned long setpoint, ...@@ -580,7 +580,7 @@ static long long pos_ratio_polynom(unsigned long setpoint,
long x; long x;
x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, x = div64_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
limit - setpoint + 1); (limit - setpoint) | 1);
pos_ratio = x; pos_ratio = x;
pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
...@@ -807,7 +807,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, ...@@ -807,7 +807,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
* scale global setpoint to bdi's: * scale global setpoint to bdi's:
* bdi_setpoint = setpoint * bdi_thresh / thresh * bdi_setpoint = setpoint * bdi_thresh / thresh
*/ */
x = div_u64((u64)bdi_thresh << 16, thresh + 1); x = div_u64((u64)bdi_thresh << 16, thresh | 1);
bdi_setpoint = setpoint * (u64)x >> 16; bdi_setpoint = setpoint * (u64)x >> 16;
/* /*
* Use span=(8*write_bw) in single bdi case as indicated by * Use span=(8*write_bw) in single bdi case as indicated by
...@@ -822,7 +822,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, ...@@ -822,7 +822,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
if (bdi_dirty < x_intercept - span / 4) { if (bdi_dirty < x_intercept - span / 4) {
pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty), pos_ratio = div64_u64(pos_ratio * (x_intercept - bdi_dirty),
x_intercept - bdi_setpoint + 1); (x_intercept - bdi_setpoint) | 1);
} else } else
pos_ratio /= 4; pos_ratio /= 4;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment