Commit 873f1c8d authored by Jens Axboe's avatar Jens Axboe

Merge branch 'block-5.7' into for-5.8/block

Pull in block-5.7 fixes for 5.8. Mostly to resolve a conflict with
the blk-iocost changes, but we also need the base of the bdi
use-after-free as well as we build on top of it.

* block-5.7:
  nvme: fix possible hang when ns scanning fails during error recovery
  nvme-pci: fix "slimmer CQ head update"
  bdi: add a ->dev_name field to struct backing_dev_info
  bdi: use bdi_dev_name() to get device name
  bdi: move bdi_dev_name out of line
  vboxsf: don't use the source name in the bdi name
  iocost: protect iocg->abs_vdebt with iocg->waitq.lock
  block: remove the bd_openers checks in blk_drop_partitions
  nvme: prevent double free in nvme_alloc_ns() error handling
  null_blk: Cleanup zoned device initialization
  null_blk: Fix zoned command handling
  block: remove unused header
  blk-iocost: Fix error on iocost_ioc_vrate_adj
  bdev: Reduce time holding bd_mutex in sync in blkdev_close()
  buffer: remove useless comment and WB_REASON_FREE_MORE_MEM, reason.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parents 8b075e5b 59c7c3ca
...@@ -123,6 +123,7 @@ ...@@ -123,6 +123,7 @@
#include <linux/ioprio.h> #include <linux/ioprio.h>
#include <linux/sbitmap.h> #include <linux/sbitmap.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/backing-dev.h>
#include "blk.h" #include "blk.h"
#include "blk-mq.h" #include "blk-mq.h"
...@@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) ...@@ -4976,8 +4977,9 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic)
ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio);
switch (ioprio_class) { switch (ioprio_class) {
default: default:
dev_err(bfqq->bfqd->queue->backing_dev_info->dev, pr_err("bdi %s: bfq: bad prio class %d\n",
"bfq: bad prio class %d\n", ioprio_class); bdi_dev_name(bfqq->bfqd->queue->backing_dev_info),
ioprio_class);
/* fall through */ /* fall through */
case IOPRIO_CLASS_NONE: case IOPRIO_CLASS_NONE:
/* /*
......
...@@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg) ...@@ -496,7 +496,7 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
{ {
/* some drivers (floppy) instantiate a queue w/o disk registered */ /* some drivers (floppy) instantiate a queue w/o disk registered */
if (blkg->q->backing_dev_info->dev) if (blkg->q->backing_dev_info->dev)
return dev_name(blkg->q->backing_dev_info->dev); return bdi_dev_name(blkg->q->backing_dev_info);
return NULL; return NULL;
} }
......
...@@ -467,7 +467,7 @@ struct ioc_gq { ...@@ -467,7 +467,7 @@ struct ioc_gq {
*/ */
atomic64_t vtime; atomic64_t vtime;
atomic64_t done_vtime; atomic64_t done_vtime;
atomic64_t abs_vdebt; u64 abs_vdebt;
u64 last_vtime; u64 last_vtime;
/* /*
...@@ -1143,7 +1143,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now) ...@@ -1143,7 +1143,7 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
struct iocg_wake_ctx ctx = { .iocg = iocg }; struct iocg_wake_ctx ctx = { .iocg = iocg };
u64 margin_ns = (u64)(ioc->period_us * u64 margin_ns = (u64)(ioc->period_us *
WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC; WAITQ_TIMER_MARGIN_PCT / 100) * NSEC_PER_USEC;
u64 abs_vdebt, vdebt, vshortage, expires, oexpires; u64 vdebt, vshortage, expires, oexpires;
s64 vbudget; s64 vbudget;
u32 hw_inuse; u32 hw_inuse;
...@@ -1153,18 +1153,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now) ...@@ -1153,18 +1153,15 @@ static void iocg_kick_waitq(struct ioc_gq *iocg, struct ioc_now *now)
vbudget = now->vnow - atomic64_read(&iocg->vtime); vbudget = now->vnow - atomic64_read(&iocg->vtime);
/* pay off debt */ /* pay off debt */
abs_vdebt = atomic64_read(&iocg->abs_vdebt); vdebt = abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
vdebt = abs_cost_to_cost(abs_vdebt, hw_inuse);
if (vdebt && vbudget > 0) { if (vdebt && vbudget > 0) {
u64 delta = min_t(u64, vbudget, vdebt); u64 delta = min_t(u64, vbudget, vdebt);
u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse), u64 abs_delta = min(cost_to_abs_cost(delta, hw_inuse),
abs_vdebt); iocg->abs_vdebt);
atomic64_add(delta, &iocg->vtime); atomic64_add(delta, &iocg->vtime);
atomic64_add(delta, &iocg->done_vtime); atomic64_add(delta, &iocg->done_vtime);
atomic64_sub(abs_delta, &iocg->abs_vdebt); iocg->abs_vdebt -= abs_delta;
if (WARN_ON_ONCE(atomic64_read(&iocg->abs_vdebt) < 0))
atomic64_set(&iocg->abs_vdebt, 0);
} }
/* /*
...@@ -1220,12 +1217,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) ...@@ -1220,12 +1217,18 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
u64 delta_ns, expires, oexpires; u64 delta_ns, expires, oexpires;
u32 hw_inuse; u32 hw_inuse;
lockdep_assert_held(&iocg->waitq.lock);
/* debt-adjust vtime */ /* debt-adjust vtime */
current_hweight(iocg, NULL, &hw_inuse); current_hweight(iocg, NULL, &hw_inuse);
vtime += abs_cost_to_cost(atomic64_read(&iocg->abs_vdebt), hw_inuse); vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
/* clear or maintain depending on the overage */ /*
if (time_before_eq64(vtime, now->vnow)) { * Clear or maintain depending on the overage. Non-zero vdebt is what
* guarantees that @iocg is online and future iocg_kick_delay() will
* clear use_delay. Don't leave it on when there's no vdebt.
*/
if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
blkcg_clear_delay(blkg); blkcg_clear_delay(blkg);
return false; return false;
} }
...@@ -1254,9 +1257,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer) ...@@ -1254,9 +1257,12 @@ static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
{ {
struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer); struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
struct ioc_now now; struct ioc_now now;
unsigned long flags;
spin_lock_irqsave(&iocg->waitq.lock, flags);
ioc_now(iocg->ioc, &now); ioc_now(iocg->ioc, &now);
iocg_kick_delay(iocg, &now); iocg_kick_delay(iocg, &now);
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
} }
...@@ -1364,14 +1370,13 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1364,14 +1370,13 @@ static void ioc_timer_fn(struct timer_list *timer)
* should have woken up in the last period and expire idle iocgs. * should have woken up in the last period and expire idle iocgs.
*/ */
list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) {
if (!waitqueue_active(&iocg->waitq) && if (!waitqueue_active(&iocg->waitq) && iocg->abs_vdebt &&
!atomic64_read(&iocg->abs_vdebt) && !iocg_is_idle(iocg)) !iocg_is_idle(iocg))
continue; continue;
spin_lock(&iocg->waitq.lock); spin_lock(&iocg->waitq.lock);
if (waitqueue_active(&iocg->waitq) || if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt) {
atomic64_read(&iocg->abs_vdebt)) {
/* might be oversleeping vtime / hweight changes, kick */ /* might be oversleeping vtime / hweight changes, kick */
iocg_kick_waitq(iocg, &now); iocg_kick_waitq(iocg, &now);
iocg_kick_delay(iocg, &now); iocg_kick_delay(iocg, &now);
...@@ -1587,7 +1592,7 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1587,7 +1592,7 @@ static void ioc_timer_fn(struct timer_list *timer)
vrate_min, vrate_max); vrate_min, vrate_max);
} }
trace_iocost_ioc_vrate_adj(ioc, vrate, &missed_ppm, rq_wait_pct, trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct,
nr_lagging, nr_shortages, nr_lagging, nr_shortages,
nr_surpluses); nr_surpluses);
...@@ -1596,7 +1601,7 @@ static void ioc_timer_fn(struct timer_list *timer) ...@@ -1596,7 +1601,7 @@ static void ioc_timer_fn(struct timer_list *timer)
ioc->period_us * vrate * INUSE_MARGIN_PCT, 100); ioc->period_us * vrate * INUSE_MARGIN_PCT, 100);
} else if (ioc->busy_level != prev_busy_level || nr_lagging) { } else if (ioc->busy_level != prev_busy_level || nr_lagging) {
trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate),
&missed_ppm, rq_wait_pct, nr_lagging, missed_ppm, rq_wait_pct, nr_lagging,
nr_shortages, nr_surpluses); nr_shortages, nr_surpluses);
} }
...@@ -1739,28 +1744,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) ...@@ -1739,28 +1744,49 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
* tests are racy but the races aren't systemic - we only miss once * tests are racy but the races aren't systemic - we only miss once
* in a while which is fine. * in a while which is fine.
*/ */
if (!waitqueue_active(&iocg->waitq) && if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt &&
!atomic64_read(&iocg->abs_vdebt) &&
time_before_eq64(vtime + cost, now.vnow)) { time_before_eq64(vtime + cost, now.vnow)) {
iocg_commit_bio(iocg, bio, cost); iocg_commit_bio(iocg, bio, cost);
return; return;
} }
/* /*
* We're over budget. If @bio has to be issued regardless, * We activated above but w/o any synchronization. Deactivation is
* remember the abs_cost instead of advancing vtime. * synchronized with waitq.lock and we won't get deactivated as long
* iocg_kick_waitq() will pay off the debt before waking more IOs. * as we're waiting or has debt, so we're good if we're activated
* here. In the unlikely case that we aren't, just issue the IO.
*/
spin_lock_irq(&iocg->waitq.lock);
if (unlikely(list_empty(&iocg->active_list))) {
spin_unlock_irq(&iocg->waitq.lock);
iocg_commit_bio(iocg, bio, cost);
return;
}
/*
* We're over budget. If @bio has to be issued regardless, remember
* the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay
* off the debt before waking more IOs.
*
* This way, the debt is continuously paid off each period with the * This way, the debt is continuously paid off each period with the
* actual budget available to the cgroup. If we just wound vtime, * actual budget available to the cgroup. If we just wound vtime, we
* we would incorrectly use the current hw_inuse for the entire * would incorrectly use the current hw_inuse for the entire amount
* amount which, for example, can lead to the cgroup staying * which, for example, can lead to the cgroup staying blocked for a
* blocked for a long time even with substantially raised hw_inuse. * long time even with substantially raised hw_inuse.
*
* An iocg with vdebt should stay online so that the timer can keep
* deducting its vdebt and [de]activate use_delay mechanism
* accordingly. We don't want to race against the timer trying to
* clear them and leave @iocg inactive w/ dangling use_delay heavily
* penalizing the cgroup and its descendants.
*/ */
if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) { if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
atomic64_add(abs_cost, &iocg->abs_vdebt); iocg->abs_vdebt += abs_cost;
if (iocg_kick_delay(iocg, &now)) if (iocg_kick_delay(iocg, &now))
blkcg_schedule_throttle(rqos->q, blkcg_schedule_throttle(rqos->q,
(bio->bi_opf & REQ_SWAP) == REQ_SWAP); (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
spin_unlock_irq(&iocg->waitq.lock);
return; return;
} }
...@@ -1777,20 +1803,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) ...@@ -1777,20 +1803,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
* All waiters are on iocg->waitq and the wait states are * All waiters are on iocg->waitq and the wait states are
* synchronized using waitq.lock. * synchronized using waitq.lock.
*/ */
spin_lock_irq(&iocg->waitq.lock);
/*
* We activated above but w/o any synchronization. Deactivation is
* synchronized with waitq.lock and we won't get deactivated as
* long as we're waiting, so we're good if we're activated here.
* In the unlikely case that we are deactivated, just issue the IO.
*/
if (unlikely(list_empty(&iocg->active_list))) {
spin_unlock_irq(&iocg->waitq.lock);
iocg_commit_bio(iocg, bio, cost);
return;
}
init_waitqueue_func_entry(&wait.wait, iocg_wake_fn); init_waitqueue_func_entry(&wait.wait, iocg_wake_fn);
wait.wait.private = current; wait.wait.private = current;
wait.bio = bio; wait.bio = bio;
...@@ -1822,6 +1834,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, ...@@ -1822,6 +1834,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
struct ioc_now now; struct ioc_now now;
u32 hw_inuse; u32 hw_inuse;
u64 abs_cost, cost; u64 abs_cost, cost;
unsigned long flags;
/* bypass if disabled or for root cgroup */ /* bypass if disabled or for root cgroup */
if (!ioc->enabled || !iocg->level) if (!ioc->enabled || !iocg->level)
...@@ -1841,15 +1854,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, ...@@ -1841,15 +1854,28 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
iocg->cursor = bio_end; iocg->cursor = bio_end;
/* /*
* Charge if there's enough vtime budget and the existing request * Charge if there's enough vtime budget and the existing request has
* has cost assigned. Otherwise, account it as debt. See debt * cost assigned.
* handling in ioc_rqos_throttle() for details.
*/ */
if (rq->bio && rq->bio->bi_iocost_cost && if (rq->bio && rq->bio->bi_iocost_cost &&
time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) {
iocg_commit_bio(iocg, bio, cost); iocg_commit_bio(iocg, bio, cost);
else return;
atomic64_add(abs_cost, &iocg->abs_vdebt); }
/*
* Otherwise, account it as debt if @iocg is online, which it should
* be for the vast majority of cases. See debt handling in
* ioc_rqos_throttle() for details.
*/
spin_lock_irqsave(&iocg->waitq.lock, flags);
if (likely(!list_empty(&iocg->active_list))) {
iocg->abs_vdebt += abs_cost;
iocg_kick_delay(iocg, &now);
} else {
iocg_commit_bio(iocg, bio, cost);
}
spin_unlock_irqrestore(&iocg->waitq.lock, flags);
} }
static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
...@@ -2021,7 +2047,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd) ...@@ -2021,7 +2047,6 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
iocg->ioc = ioc; iocg->ioc = ioc;
atomic64_set(&iocg->vtime, now.vnow); atomic64_set(&iocg->vtime, now.vnow);
atomic64_set(&iocg->done_vtime, now.vnow); atomic64_set(&iocg->done_vtime, now.vnow);
atomic64_set(&iocg->abs_vdebt, 0);
atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period)); atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period));
INIT_LIST_HEAD(&iocg->active_list); INIT_LIST_HEAD(&iocg->active_list);
iocg->hweight_active = HWEIGHT_WHOLE; iocg->hweight_active = HWEIGHT_WHOLE;
......
...@@ -610,7 +610,7 @@ int blk_drop_partitions(struct block_device *bdev) ...@@ -610,7 +610,7 @@ int blk_drop_partitions(struct block_device *bdev)
if (!disk_part_scan_enabled(bdev->bd_disk)) if (!disk_part_scan_enabled(bdev->bd_disk))
return 0; return 0;
if (bdev->bd_part_count || bdev->bd_openers > 1) if (bdev->bd_part_count)
return -EBUSY; return -EBUSY;
sync_blockdev(bdev); sync_blockdev(bdev);
......
...@@ -85,26 +85,35 @@ struct nullb { ...@@ -85,26 +85,35 @@ struct nullb {
char disk_name[DISK_NAME_LEN]; char disk_name[DISK_NAME_LEN];
}; };
blk_status_t null_process_cmd(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
unsigned int nr_sectors);
#ifdef CONFIG_BLK_DEV_ZONED #ifdef CONFIG_BLK_DEV_ZONED
int null_zone_init(struct nullb_device *dev); int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q);
void null_zone_exit(struct nullb_device *dev); int null_register_zoned_dev(struct nullb *nullb);
void null_free_zoned_dev(struct nullb_device *dev);
int null_report_zones(struct gendisk *disk, sector_t sector, int null_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data); unsigned int nr_zones, report_zones_cb cb, void *data);
blk_status_t null_handle_zoned(struct nullb_cmd *cmd, blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector, enum req_opf op, sector_t sector,
sector_t nr_sectors); sector_t nr_sectors);
size_t null_zone_valid_read_len(struct nullb *nullb, size_t null_zone_valid_read_len(struct nullb *nullb,
sector_t sector, unsigned int len); sector_t sector, unsigned int len);
#else #else
static inline int null_zone_init(struct nullb_device *dev) static inline int null_init_zoned_dev(struct nullb_device *dev,
struct request_queue *q)
{ {
pr_err("CONFIG_BLK_DEV_ZONED not enabled\n"); pr_err("CONFIG_BLK_DEV_ZONED not enabled\n");
return -EINVAL; return -EINVAL;
} }
static inline void null_zone_exit(struct nullb_device *dev) {} static inline int null_register_zoned_dev(struct nullb *nullb)
static inline blk_status_t null_handle_zoned(struct nullb_cmd *cmd, {
enum req_opf op, sector_t sector, return -ENODEV;
sector_t nr_sectors) }
static inline void null_free_zoned_dev(struct nullb_device *dev) {}
static inline blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector, sector_t nr_sectors)
{ {
return BLK_STS_NOTSUPP; return BLK_STS_NOTSUPP;
} }
......
...@@ -580,7 +580,7 @@ static void null_free_dev(struct nullb_device *dev) ...@@ -580,7 +580,7 @@ static void null_free_dev(struct nullb_device *dev)
if (!dev) if (!dev)
return; return;
null_zone_exit(dev); null_free_zoned_dev(dev);
badblocks_exit(&dev->badblocks); badblocks_exit(&dev->badblocks);
kfree(dev); kfree(dev);
} }
...@@ -1276,6 +1276,25 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd) ...@@ -1276,6 +1276,25 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
} }
} }
blk_status_t null_process_cmd(struct nullb_cmd *cmd,
enum req_opf op, sector_t sector,
unsigned int nr_sectors)
{
struct nullb_device *dev = cmd->nq->dev;
blk_status_t ret;
if (dev->badblocks.shift != -1) {
ret = null_handle_badblocks(cmd, sector, nr_sectors);
if (ret != BLK_STS_OK)
return ret;
}
if (dev->memory_backed)
return null_handle_memory_backed(cmd, op);
return BLK_STS_OK;
}
static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
sector_t nr_sectors, enum req_opf op) sector_t nr_sectors, enum req_opf op)
{ {
...@@ -1294,17 +1313,11 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector, ...@@ -1294,17 +1313,11 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
goto out; goto out;
} }
if (nullb->dev->badblocks.shift != -1) { if (dev->zoned)
cmd->error = null_handle_badblocks(cmd, sector, nr_sectors); cmd->error = null_process_zoned_cmd(cmd, op,
if (cmd->error != BLK_STS_OK) sector, nr_sectors);
goto out; else
} cmd->error = null_process_cmd(cmd, op, sector, nr_sectors);
if (dev->memory_backed)
cmd->error = null_handle_memory_backed(cmd, op);
if (!cmd->error && dev->zoned)
cmd->error = null_handle_zoned(cmd, op, sector, nr_sectors);
out: out:
nullb_complete_cmd(cmd); nullb_complete_cmd(cmd);
...@@ -1605,19 +1618,12 @@ static int null_gendisk_register(struct nullb *nullb) ...@@ -1605,19 +1618,12 @@ static int null_gendisk_register(struct nullb *nullb)
disk->queue = nullb->q; disk->queue = nullb->q;
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN); strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
#ifdef CONFIG_BLK_DEV_ZONED
if (nullb->dev->zoned) { if (nullb->dev->zoned) {
if (queue_is_mq(nullb->q)) { int ret = null_register_zoned_dev(nullb);
int ret = blk_revalidate_disk_zones(disk);
if (ret) if (ret)
return ret; return ret;
} else {
blk_queue_chunk_sectors(nullb->q,
nullb->dev->zone_size_sects);
nullb->q->nr_zones = blkdev_nr_zones(disk);
}
} }
#endif
add_disk(disk); add_disk(disk);
return 0; return 0;
...@@ -1773,14 +1779,9 @@ static int null_add_dev(struct nullb_device *dev) ...@@ -1773,14 +1779,9 @@ static int null_add_dev(struct nullb_device *dev)
} }
if (dev->zoned) { if (dev->zoned) {
rv = null_zone_init(dev); rv = null_init_zoned_dev(dev, nullb->q);
if (rv) if (rv)
goto out_cleanup_blk_queue; goto out_cleanup_blk_queue;
nullb->q->limits.zoned = BLK_ZONED_HM;
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, nullb->q);
blk_queue_required_elevator_features(nullb->q,
ELEVATOR_F_ZBD_SEQ_WRITE);
} }
nullb->q->queuedata = nullb; nullb->q->queuedata = nullb;
...@@ -1809,8 +1810,7 @@ static int null_add_dev(struct nullb_device *dev) ...@@ -1809,8 +1810,7 @@ static int null_add_dev(struct nullb_device *dev)
return 0; return 0;
out_cleanup_zone: out_cleanup_zone:
if (dev->zoned) null_free_zoned_dev(dev);
null_zone_exit(dev);
out_cleanup_blk_queue: out_cleanup_blk_queue:
blk_cleanup_queue(nullb->q); blk_cleanup_queue(nullb->q);
out_cleanup_tags: out_cleanup_tags:
......
...@@ -13,7 +13,7 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) ...@@ -13,7 +13,7 @@ static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect)
return sect >> ilog2(dev->zone_size_sects); return sect >> ilog2(dev->zone_size_sects);
} }
int null_zone_init(struct nullb_device *dev) int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q)
{ {
sector_t dev_size = (sector_t)dev->size * 1024 * 1024; sector_t dev_size = (sector_t)dev->size * 1024 * 1024;
sector_t sector = 0; sector_t sector = 0;
...@@ -61,10 +61,27 @@ int null_zone_init(struct nullb_device *dev) ...@@ -61,10 +61,27 @@ int null_zone_init(struct nullb_device *dev)
sector += dev->zone_size_sects; sector += dev->zone_size_sects;
} }
q->limits.zoned = BLK_ZONED_HM;
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
return 0;
}
int null_register_zoned_dev(struct nullb *nullb)
{
struct request_queue *q = nullb->q;
if (queue_is_mq(q))
return blk_revalidate_disk_zones(nullb->disk);
blk_queue_chunk_sectors(q, nullb->dev->zone_size_sects);
q->nr_zones = blkdev_nr_zones(nullb->disk);
return 0; return 0;
} }
void null_zone_exit(struct nullb_device *dev) void null_free_zoned_dev(struct nullb_device *dev)
{ {
kvfree(dev->zones); kvfree(dev->zones);
} }
...@@ -126,11 +143,16 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, ...@@ -126,11 +143,16 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
struct nullb_device *dev = cmd->nq->dev; struct nullb_device *dev = cmd->nq->dev;
unsigned int zno = null_zone_no(dev, sector); unsigned int zno = null_zone_no(dev, sector);
struct blk_zone *zone = &dev->zones[zno]; struct blk_zone *zone = &dev->zones[zno];
blk_status_t ret;
trace_nullb_zone_op(cmd, zno, zone->cond);
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
return null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
switch (zone->cond) { switch (zone->cond) {
case BLK_ZONE_COND_FULL: case BLK_ZONE_COND_FULL:
/* Cannot write to a full zone */ /* Cannot write to a full zone */
cmd->error = BLK_STS_IOERR;
return BLK_STS_IOERR; return BLK_STS_IOERR;
case BLK_ZONE_COND_EMPTY: case BLK_ZONE_COND_EMPTY:
case BLK_ZONE_COND_IMP_OPEN: case BLK_ZONE_COND_IMP_OPEN:
...@@ -143,19 +165,18 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, ...@@ -143,19 +165,18 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
if (zone->cond != BLK_ZONE_COND_EXP_OPEN) if (zone->cond != BLK_ZONE_COND_EXP_OPEN)
zone->cond = BLK_ZONE_COND_IMP_OPEN; zone->cond = BLK_ZONE_COND_IMP_OPEN;
ret = null_process_cmd(cmd, REQ_OP_WRITE, sector, nr_sectors);
if (ret != BLK_STS_OK)
return ret;
zone->wp += nr_sectors; zone->wp += nr_sectors;
if (zone->wp == zone->start + zone->len) if (zone->wp == zone->start + zone->len)
zone->cond = BLK_ZONE_COND_FULL; zone->cond = BLK_ZONE_COND_FULL;
break; return BLK_STS_OK;
case BLK_ZONE_COND_NOT_WP:
break;
default: default:
/* Invalid zone condition */ /* Invalid zone condition */
return BLK_STS_IOERR; return BLK_STS_IOERR;
} }
trace_nullb_zone_op(cmd, zno, zone->cond);
return BLK_STS_OK;
} }
static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
...@@ -216,7 +237,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, ...@@ -216,7 +237,7 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
return BLK_STS_OK; return BLK_STS_OK;
} }
blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op, blk_status_t null_process_zoned_cmd(struct nullb_cmd *cmd, enum req_opf op,
sector_t sector, sector_t nr_sectors) sector_t sector, sector_t nr_sectors)
{ {
switch (op) { switch (op) {
...@@ -229,6 +250,6 @@ blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op, ...@@ -229,6 +250,6 @@ blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op,
case REQ_OP_ZONE_FINISH: case REQ_OP_ZONE_FINISH:
return null_zone_mgmt(cmd, op, sector); return null_zone_mgmt(cmd, op, sector);
default: default:
return BLK_STS_OK; return null_process_cmd(cmd, op, sector, nr_sectors);
} }
} }
...@@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, ...@@ -1110,7 +1110,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
* Don't treat an error as fatal, as we potentially already * Don't treat an error as fatal, as we potentially already
* have a NGUID or EUI-64. * have a NGUID or EUI-64.
*/ */
if (status > 0) if (status > 0 && !(status & NVME_SC_DNR))
status = 0; status = 0;
goto free_data; goto free_data;
} }
...@@ -3642,6 +3642,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -3642,6 +3642,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
return; return;
out_put_disk: out_put_disk:
/* prevent double queue cleanup */
ns->disk->queue = NULL;
put_disk(ns->disk); put_disk(ns->disk);
out_unlink_ns: out_unlink_ns:
mutex_lock(&ctrl->subsys->lock); mutex_lock(&ctrl->subsys->lock);
......
...@@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) ...@@ -973,9 +973,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{ {
if (++nvmeq->cq_head == nvmeq->q_depth) { u16 tmp = nvmeq->cq_head + 1;
if (tmp == nvmeq->q_depth) {
nvmeq->cq_head = 0; nvmeq->cq_head = 0;
nvmeq->cq_phase ^= 1; nvmeq->cq_phase ^= 1;
} else {
nvmeq->cq_head = tmp;
} }
} }
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/blkpg.h> #include <linux/blkpg.h>
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/dax.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
...@@ -1876,6 +1875,16 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) ...@@ -1876,6 +1875,16 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
struct gendisk *disk = bdev->bd_disk; struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL; struct block_device *victim = NULL;
/*
* Sync early if it looks like we're the last one. If someone else
* opens the block device between now and the decrement of bd_openers
* then we did a sync that we didn't need to, but that's not the end
* of the world and we want to avoid long (could be several minute)
* syncs while holding the mutex.
*/
if (bdev->bd_openers == 1)
sync_blockdev(bdev);
mutex_lock_nested(&bdev->bd_mutex, for_part); mutex_lock_nested(&bdev->bd_mutex, for_part);
if (for_part) if (for_part)
bdev->bd_part_count--; bdev->bd_part_count--;
......
...@@ -967,7 +967,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, ...@@ -967,7 +967,7 @@ grow_dev_page(struct block_device *bdev, sector_t block,
struct page *page; struct page *page;
struct buffer_head *bh; struct buffer_head *bh;
sector_t end_block; sector_t end_block;
int ret = 0; /* Will call free_more_memory() */ int ret = 0;
gfp_t gfp_mask; gfp_t gfp_mask;
gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp;
......
...@@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) ...@@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
&congestion_kb_fops); &congestion_kb_fops);
snprintf(name, sizeof(name), "../../bdi/%s", snprintf(name, sizeof(name), "../../bdi/%s",
dev_name(fsc->sb->s_bdi->dev)); bdi_dev_name(fsc->sb->s_bdi));
fsc->debugfs_bdi = fsc->debugfs_bdi =
debugfs_create_symlink("bdi", debugfs_create_symlink("bdi",
fsc->client->debugfs_dir, fsc->client->debugfs_dir,
......
...@@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) ...@@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
goto fail_free; goto fail_free;
} }
err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id);
if (err) if (err)
goto fail_free; goto fail_free;
......
...@@ -54,7 +54,6 @@ enum wb_reason { ...@@ -54,7 +54,6 @@ enum wb_reason {
WB_REASON_SYNC, WB_REASON_SYNC,
WB_REASON_PERIODIC, WB_REASON_PERIODIC,
WB_REASON_LAPTOP_TIMER, WB_REASON_LAPTOP_TIMER,
WB_REASON_FREE_MORE_MEM,
WB_REASON_FS_FREE_SPACE, WB_REASON_FS_FREE_SPACE,
/* /*
* There is no bdi forker thread any more and works are done * There is no bdi forker thread any more and works are done
...@@ -220,6 +219,7 @@ struct backing_dev_info { ...@@ -220,6 +219,7 @@ struct backing_dev_info {
wait_queue_head_t wb_waitq; wait_queue_head_t wb_waitq;
struct device *dev; struct device *dev;
char dev_name[64];
struct device *owner; struct device *owner;
struct timer_list laptop_mode_wb_timer; struct timer_list laptop_mode_wb_timer;
......
...@@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) ...@@ -505,13 +505,6 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
(1 << WB_async_congested)); (1 << WB_async_congested));
} }
extern const char *bdi_unknown_name; const char *bdi_dev_name(struct backing_dev_info *bdi);
static inline const char *bdi_dev_name(struct backing_dev_info *bdi)
{
if (!bdi || !bdi->dev)
return bdi_unknown_name;
return dev_name(bdi->dev);
}
#endif /* _LINUX_BACKING_DEV_H */ #endif /* _LINUX_BACKING_DEV_H */
...@@ -130,7 +130,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset, ...@@ -130,7 +130,7 @@ DEFINE_EVENT(iocg_inuse_update, iocost_inuse_reset,
TRACE_EVENT(iocost_ioc_vrate_adj, TRACE_EVENT(iocost_ioc_vrate_adj,
TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 (*missed_ppm)[2], TP_PROTO(struct ioc *ioc, u64 new_vrate, u32 *missed_ppm,
u32 rq_wait_pct, int nr_lagging, int nr_shortages, u32 rq_wait_pct, int nr_lagging, int nr_shortages,
int nr_surpluses), int nr_surpluses),
...@@ -155,8 +155,8 @@ TRACE_EVENT(iocost_ioc_vrate_adj, ...@@ -155,8 +155,8 @@ TRACE_EVENT(iocost_ioc_vrate_adj,
__entry->old_vrate = atomic64_read(&ioc->vtime_rate);; __entry->old_vrate = atomic64_read(&ioc->vtime_rate);;
__entry->new_vrate = new_vrate; __entry->new_vrate = new_vrate;
__entry->busy_level = ioc->busy_level; __entry->busy_level = ioc->busy_level;
__entry->read_missed_ppm = (*missed_ppm)[READ]; __entry->read_missed_ppm = missed_ppm[READ];
__entry->write_missed_ppm = (*missed_ppm)[WRITE]; __entry->write_missed_ppm = missed_ppm[WRITE];
__entry->rq_wait_pct = rq_wait_pct; __entry->rq_wait_pct = rq_wait_pct;
__entry->nr_lagging = nr_lagging; __entry->nr_lagging = nr_lagging;
__entry->nr_shortages = nr_shortages; __entry->nr_shortages = nr_shortages;
......
...@@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat, ...@@ -33,7 +33,7 @@ TRACE_EVENT(wbt_stat,
), ),
TP_fast_assign( TP_fast_assign(
strlcpy(__entry->name, dev_name(bdi->dev), strlcpy(__entry->name, bdi_dev_name(bdi),
ARRAY_SIZE(__entry->name)); ARRAY_SIZE(__entry->name));
__entry->rmean = stat[0].mean; __entry->rmean = stat[0].mean;
__entry->rmin = stat[0].min; __entry->rmin = stat[0].min;
...@@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat, ...@@ -68,7 +68,7 @@ TRACE_EVENT(wbt_lat,
), ),
TP_fast_assign( TP_fast_assign(
strlcpy(__entry->name, dev_name(bdi->dev), strlcpy(__entry->name, bdi_dev_name(bdi),
ARRAY_SIZE(__entry->name)); ARRAY_SIZE(__entry->name));
__entry->lat = div_u64(lat, 1000); __entry->lat = div_u64(lat, 1000);
), ),
...@@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step, ...@@ -105,7 +105,7 @@ TRACE_EVENT(wbt_step,
), ),
TP_fast_assign( TP_fast_assign(
strlcpy(__entry->name, dev_name(bdi->dev), strlcpy(__entry->name, bdi_dev_name(bdi),
ARRAY_SIZE(__entry->name)); ARRAY_SIZE(__entry->name));
__entry->msg = msg; __entry->msg = msg;
__entry->step = step; __entry->step = step;
...@@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer, ...@@ -141,7 +141,7 @@ TRACE_EVENT(wbt_timer,
), ),
TP_fast_assign( TP_fast_assign(
strlcpy(__entry->name, dev_name(bdi->dev), strlcpy(__entry->name, bdi_dev_name(bdi),
ARRAY_SIZE(__entry->name)); ARRAY_SIZE(__entry->name));
__entry->status = status; __entry->status = status;
__entry->step = step; __entry->step = step;
......
...@@ -36,7 +36,6 @@ ...@@ -36,7 +36,6 @@
EM( WB_REASON_SYNC, "sync") \ EM( WB_REASON_SYNC, "sync") \
EM( WB_REASON_PERIODIC, "periodic") \ EM( WB_REASON_PERIODIC, "periodic") \
EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \ EM( WB_REASON_LAPTOP_TIMER, "laptop_timer") \
EM( WB_REASON_FREE_MORE_MEM, "free_more_memory") \
EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \ EM( WB_REASON_FS_FREE_SPACE, "fs_free_space") \
EMe(WB_REASON_FORKER_THREAD, "forker_thread") EMe(WB_REASON_FORKER_THREAD, "forker_thread")
......
...@@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = { ...@@ -21,7 +21,7 @@ struct backing_dev_info noop_backing_dev_info = {
EXPORT_SYMBOL_GPL(noop_backing_dev_info); EXPORT_SYMBOL_GPL(noop_backing_dev_info);
static struct class *bdi_class; static struct class *bdi_class;
const char *bdi_unknown_name = "(unknown)"; static const char *bdi_unknown_name = "(unknown)";
/* /*
* bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU * bdi_lock protects bdi_tree and updates to bdi_list. bdi_list has RCU
...@@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) ...@@ -938,7 +938,8 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
if (bdi->dev) /* The driver needs to use separate queues per device */ if (bdi->dev) /* The driver needs to use separate queues per device */
return 0; return 0;
dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args); vsnprintf(bdi->dev_name, sizeof(bdi->dev_name), fmt, args);
dev = device_create(bdi_class, NULL, MKDEV(0, 0), bdi, bdi->dev_name);
if (IS_ERR(dev)) if (IS_ERR(dev))
return PTR_ERR(dev); return PTR_ERR(dev);
...@@ -1043,6 +1044,14 @@ void bdi_put(struct backing_dev_info *bdi) ...@@ -1043,6 +1044,14 @@ void bdi_put(struct backing_dev_info *bdi)
} }
EXPORT_SYMBOL(bdi_put); EXPORT_SYMBOL(bdi_put);
const char *bdi_dev_name(struct backing_dev_info *bdi)
{
if (!bdi || !bdi->dev)
return bdi_unknown_name;
return bdi->dev_name;
}
EXPORT_SYMBOL_GPL(bdi_dev_name);
static wait_queue_head_t congestion_wqh[2] = { static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
......
...@@ -160,7 +160,12 @@ class IocgStat: ...@@ -160,7 +160,12 @@ class IocgStat:
else: else:
self.inflight_pct = 0 self.inflight_pct = 0
# vdebt used to be an atomic64_t and is now u64, support both
try:
self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
except:
self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
self.use_delay = blkg.use_delay.counter.value_() self.use_delay = blkg.use_delay.counter.value_()
self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment