Commit 439b84fa authored by Jens Axboe's avatar Jens Axboe

Merge branch 'for-5.5/block' into for-5.5/drivers

Pull in dependencies for the new zoned open/close/finish support.

* for-5.5/block: (32 commits)
  block: add zone open, close and finish ioctl support
  block: add zone open, close and finish operations
  block: Simplify REQ_OP_ZONE_RESET_ALL handling
  block: Remove REQ_OP_ZONE_RESET plugging
  block: Warn if elevator= parameter is used
  block: avoid blk_bio_segment_split for small I/O operations
  blk-mq: make sure that line break can be printed
  block: sed-opal: Introduce Opal Datastore UID
  block: sed-opal: Add support to read/write opal tables generically
  block: sed-opal: Generalizing write data to any opal table
  bdev: Refresh bdev size for disks without partitioning
  bdev: Factor out bdev revalidation into a common helper
  blk-mq: avoid sysfs buffer overflow with too many CPU cores
  blk-mq: Make blk_mq_run_hw_queue() return void
  fcntl: fix typo in RWH_WRITE_LIFE_NOT_SET r/w hint name
  blk-mq: fill header with kernel-doc
  blk-mq: remove needless goto from blk_mq_get_driver_tag
  block: reorder bio::__bi_remaining for better packing
  block: Reduce the amount of memory used for tag sets
  block: Reduce the amount of memory required per request queue
  ...
parents 64fab729 e876df1f
......@@ -132,6 +132,9 @@ static const char *const blk_op_name[] = {
REQ_OP_NAME(SECURE_ERASE),
REQ_OP_NAME(ZONE_RESET),
REQ_OP_NAME(ZONE_RESET_ALL),
REQ_OP_NAME(ZONE_OPEN),
REQ_OP_NAME(ZONE_CLOSE),
REQ_OP_NAME(ZONE_FINISH),
REQ_OP_NAME(WRITE_SAME),
REQ_OP_NAME(WRITE_ZEROES),
REQ_OP_NAME(SCSI_IN),
......@@ -336,14 +339,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying);
*/
void blk_cleanup_queue(struct request_queue *q)
{
WARN_ON_ONCE(blk_queue_registered(q));
/* mark @q DYING, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
blk_set_queue_dying(q);
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
blk_queue_flag_set(QUEUE_FLAG_DYING, q);
mutex_unlock(&q->sysfs_lock);
/*
* Drain all requests queued before DYING marking. Set DEAD flag to
......@@ -849,10 +852,10 @@ static inline int blk_partition_remap(struct bio *bio)
goto out;
/*
* Zone reset does not include bi_size so bio_sectors() is always 0.
* Include a test for the reset op code and perform the remap if needed.
* Zone management bios do not have a sector count but they do have
* a start sector filled out and need to be remapped.
*/
if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) {
if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) {
if (bio_check_eod(bio, part_nr_sects_read(p)))
goto out;
bio->bi_iter.bi_sector += p->start_sect;
......@@ -936,6 +939,9 @@ generic_make_request_checks(struct bio *bio)
goto not_supported;
break;
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
case REQ_OP_ZONE_FINISH:
if (!blk_queue_is_zoned(q))
goto not_supported;
break;
......
......@@ -55,6 +55,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
rq->rq_disk = bd_disk;
rq->end_io = done;
blk_account_io_start(rq, true);
/*
* don't check dying flag for MQ because the request won't
* be reused after dying flag is set
......
......@@ -293,7 +293,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
void __blk_queue_split(struct request_queue *q, struct bio **bio,
unsigned int *nr_segs)
{
struct bio *split;
struct bio *split = NULL;
switch (bio_op(*bio)) {
case REQ_OP_DISCARD:
......@@ -309,6 +309,20 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio,
nr_segs);
break;
default:
/*
* All drivers must accept single-segments bios that are <=
* PAGE_SIZE. This is a quick and dirty check that relies on
* the fact that bi_io_vec[0] is always valid if a bio has data.
* The check might lead to occasional false negatives when bios
* are cloned, but compared to the performance impact of cloned
* bios themselves the loop below doesn't matter anyway.
*/
if (!q->limits.chunk_sectors &&
(*bio)->bi_vcnt == 1 &&
(*bio)->bi_io_vec[0].bv_len <= PAGE_SIZE) {
*nr_segs = 1;
break;
}
split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs);
break;
}
......
......@@ -74,9 +74,7 @@ static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr,
if (!entry->show)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->show(ctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
......@@ -97,9 +95,7 @@ static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr,
if (!entry->store)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->store(ctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
......@@ -120,9 +116,7 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj,
if (!entry->show)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->show(hctx, page);
mutex_unlock(&q->sysfs_lock);
return res;
......@@ -144,9 +138,7 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj,
if (!entry->store)
return -EIO;
res = -ENOENT;
mutex_lock(&q->sysfs_lock);
if (!blk_queue_dying(q))
res = entry->store(hctx, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
......@@ -166,20 +158,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx,
static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
{
const size_t size = PAGE_SIZE - 1;
unsigned int i, first = 1;
ssize_t ret = 0;
int ret = 0, pos = 0;
for_each_cpu(i, hctx->cpumask) {
if (first)
ret += sprintf(ret + page, "%u", i);
ret = snprintf(pos + page, size - pos, "%u", i);
else
ret += sprintf(ret + page, ", %u", i);
ret = snprintf(pos + page, size - pos, ", %u", i);
if (ret >= size - pos)
break;
first = 0;
pos += ret;
}
ret += sprintf(ret + page, "\n");
return ret;
ret = snprintf(pos + page, size + 1 - pos, "\n");
return pos + ret;
}
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
......
......@@ -93,7 +93,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
struct mq_inflight {
struct hd_struct *part;
unsigned int *inflight;
unsigned int inflight[2];
};
static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
......@@ -102,45 +102,29 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx,
{
struct mq_inflight *mi = priv;
/*
* index[0] counts the specific partition that was asked for.
*/
if (rq->part == mi->part)
mi->inflight[0]++;
mi->inflight[rq_data_dir(rq)]++;
return true;
}
unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part)
{
unsigned inflight[2];
struct mq_inflight mi = { .part = part, .inflight = inflight, };
struct mq_inflight mi = { .part = part };
inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
return inflight[0];
}
static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv,
bool reserved)
{
struct mq_inflight *mi = priv;
if (rq->part == mi->part)
mi->inflight[rq_data_dir(rq)]++;
return true;
return mi.inflight[0] + mi.inflight[1];
}
void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part,
unsigned int inflight[2])
{
struct mq_inflight mi = { .part = part, .inflight = inflight, };
struct mq_inflight mi = { .part = part };
inflight[0] = inflight[1] = 0;
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi);
blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi);
inflight[0] = mi.inflight[0];
inflight[1] = mi.inflight[1];
}
void blk_freeze_queue_start(struct request_queue *q)
......@@ -663,18 +647,6 @@ bool blk_mq_complete_request(struct request *rq)
}
EXPORT_SYMBOL(blk_mq_complete_request);
int blk_mq_request_started(struct request *rq)
{
return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
}
EXPORT_SYMBOL_GPL(blk_mq_request_started);
int blk_mq_request_completed(struct request *rq)
{
return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE;
}
EXPORT_SYMBOL_GPL(blk_mq_request_completed);
void blk_mq_start_request(struct request *rq)
{
struct request_queue *q = rq->q;
......@@ -1064,7 +1036,7 @@ bool blk_mq_get_driver_tag(struct request *rq)
bool shared;
if (rq->tag != -1)
goto done;
return true;
if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
data.flags |= BLK_MQ_REQ_RESERVED;
......@@ -1079,7 +1051,6 @@ bool blk_mq_get_driver_tag(struct request *rq)
data.hctx->tags->rqs[rq->tag] = rq;
}
done:
return rq->tag != -1;
}
......@@ -1486,7 +1457,7 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
}
EXPORT_SYMBOL(blk_mq_delay_run_hw_queue);
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
int srcu_idx;
bool need_run;
......@@ -1504,12 +1475,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
blk_mq_hctx_has_pending(hctx);
hctx_unlock(hctx, srcu_idx);
if (need_run) {
if (need_run)
__blk_mq_delay_run_hw_queue(hctx, async, 0);
return true;
}
return false;
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);
......@@ -2789,6 +2756,23 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
int i, j, end;
struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx;
if (q->nr_hw_queues < set->nr_hw_queues) {
struct blk_mq_hw_ctx **new_hctxs;
new_hctxs = kcalloc_node(set->nr_hw_queues,
sizeof(*new_hctxs), GFP_KERNEL,
set->numa_node);
if (!new_hctxs)
return;
if (hctxs)
memcpy(new_hctxs, hctxs, q->nr_hw_queues *
sizeof(*hctxs));
q->queue_hw_ctx = new_hctxs;
q->nr_hw_queues = set->nr_hw_queues;
kfree(hctxs);
hctxs = new_hctxs;
}
/* protect against switching io scheduler */
mutex_lock(&q->sysfs_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
......@@ -2844,19 +2828,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
mutex_unlock(&q->sysfs_lock);
}
/*
* Maximum number of hardware queues we support. For single sets, we'll never
* have more than the CPUs (software queues). For multiple sets, the tag_set
* user may have set ->nr_hw_queues larger.
*/
static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
{
if (set->nr_maps == 1)
return nr_cpu_ids;
return max(set->nr_hw_queues, nr_cpu_ids);
}
struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q,
bool elevator_init)
......@@ -2876,12 +2847,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/* init q->mq_kobj and sw queues' kobjects */
blk_mq_sysfs_init(q);
q->nr_queues = nr_hw_queues(set);
q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)),
GFP_KERNEL, set->numa_node);
if (!q->queue_hw_ctx)
goto err_sys_init;
INIT_LIST_HEAD(&q->unused_hctx_list);
spin_lock_init(&q->unused_hctx_lock);
......@@ -2929,7 +2894,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs:
kfree(q->queue_hw_ctx);
q->nr_hw_queues = 0;
err_sys_init:
blk_mq_sysfs_deinit(q);
err_poll:
blk_stat_free_callback(q->poll_cb);
......@@ -3030,6 +2994,29 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set)
}
}
static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
int cur_nr_hw_queues, int new_nr_hw_queues)
{
struct blk_mq_tags **new_tags;
if (cur_nr_hw_queues >= new_nr_hw_queues)
return 0;
new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
if (!new_tags)
return -ENOMEM;
if (set->tags)
memcpy(new_tags, set->tags, cur_nr_hw_queues *
sizeof(*set->tags));
kfree(set->tags);
set->tags = new_tags;
set->nr_hw_queues = new_nr_hw_queues;
return 0;
}
/*
* Alloc a tag set to be associated with one or more request queues.
* May fail with EINVAL for various error conditions. May adjust the
......@@ -3083,9 +3070,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
set->nr_hw_queues = nr_cpu_ids;
set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
if (!set->tags)
if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0)
return -ENOMEM;
ret = -ENOMEM;
......@@ -3126,7 +3111,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
{
int i, j;
for (i = 0; i < nr_hw_queues(set); i++)
for (i = 0; i < set->nr_hw_queues; i++)
blk_mq_free_map_and_requests(set, i);
for (j = 0; j < set->nr_maps; j++) {
......@@ -3270,10 +3255,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue(q);
/*
* Sync with blk_mq_queue_tag_busy_iter.
*/
synchronize_rcu();
/*
* Switch IO scheduler to 'none', cleaning up the data associated
* with the previous scheduler. We will switch back once we are done
......@@ -3288,6 +3269,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_sysfs_unregister(q);
}
if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) <
0)
goto reregister;
prev_nr_hw_queues = set->nr_hw_queues;
set->nr_hw_queues = nr_hw_queues;
blk_mq_update_queue_map(set);
......@@ -3304,6 +3289,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_map_swqueue(q);
}
reregister:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_sysfs_register(q);
blk_mq_debugfs_register_hctxs(q);
......
......@@ -128,15 +128,6 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
void blk_mq_release(struct request_queue *q);
/**
* blk_mq_rq_state() - read the current MQ_RQ_* state of a request
* @rq: target request.
*/
static inline enum mq_rq_state blk_mq_rq_state(struct request *rq)
{
return READ_ONCE(rq->state);
}
static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
unsigned int cpu)
{
......
......@@ -53,7 +53,7 @@ void blk_stat_add(struct request *rq, u64 now)
struct request_queue *q = rq->q;
struct blk_stat_callback *cb;
struct blk_rq_stat *stat;
int bucket;
int bucket, cpu;
u64 value;
value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0;
......@@ -61,6 +61,7 @@ void blk_stat_add(struct request *rq, u64 now)
blk_throtl_stat_add(rq, value);
rcu_read_lock();
cpu = get_cpu();
list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
if (!blk_stat_is_active(cb))
continue;
......@@ -69,10 +70,10 @@ void blk_stat_add(struct request *rq, u64 now)
if (bucket < 0)
continue;
stat = &get_cpu_ptr(cb->cpu_stat)[bucket];
stat = &per_cpu_ptr(cb->cpu_stat, cpu)[bucket];
blk_rq_stat_add(stat, value);
put_cpu_ptr(cb->cpu_stat);
}
put_cpu();
rcu_read_unlock();
}
......
......@@ -801,10 +801,6 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
if (!entry->show)
return -EIO;
mutex_lock(&q->sysfs_lock);
if (blk_queue_dying(q)) {
mutex_unlock(&q->sysfs_lock);
return -ENOENT;
}
res = entry->show(q, page);
mutex_unlock(&q->sysfs_lock);
return res;
......@@ -823,10 +819,6 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
q = container_of(kobj, struct request_queue, kobj);
mutex_lock(&q->sysfs_lock);
if (blk_queue_dying(q)) {
mutex_unlock(&q->sysfs_lock);
return -ENOENT;
}
res = entry->store(q, page, length);
mutex_unlock(&q->sysfs_lock);
return res;
......
......@@ -202,32 +202,14 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
/*
* Special case of zone reset operation to reset all zones in one command,
* useful for applications like mkfs.
*/
static int __blkdev_reset_all_zones(struct block_device *bdev, gfp_t gfp_mask)
{
struct bio *bio = bio_alloc(gfp_mask, 0);
int ret;
/* across the zones operations, don't need any sectors */
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET_ALL, 0);
ret = submit_bio_wait(bio);
bio_put(bio);
return ret;
}
static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
sector_t sector,
sector_t nr_sectors)
{
if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
return false;
if (nr_sectors != part_nr_sects_read(bdev->bd_part))
if (sector || nr_sectors != part_nr_sects_read(bdev->bd_part))
return false;
/*
* REQ_OP_ZONE_RESET_ALL can be executed only if the block device is
......@@ -239,26 +221,29 @@ static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
}
/**
* blkdev_reset_zones - Reset zones write pointer
* blkdev_zone_mgmt - Execute a zone management operation on a range of zones
* @bdev: Target block device
* @sector: Start sector of the first zone to reset
* @nr_sectors: Number of sectors, at least the length of one zone
* @op: Operation to be performed on the zones
* @sector: Start sector of the first zone to operate on
* @nr_sectors: Number of sectors, should be at least the length of one zone and
* must be zone size aligned.
* @gfp_mask: Memory allocation flags (for bio_alloc)
*
* Description:
* Reset the write pointer of the zones contained in the range
* Perform the specified operation on the range of zones specified by
* @sector..@sector+@nr_sectors. Specifying the entire disk sector range
* is valid, but the specified range should not contain conventional zones.
* The operation to execute on each zone can be a zone reset, open, close
* or finish request.
*/
int blkdev_reset_zones(struct block_device *bdev,
int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
sector_t sector, sector_t nr_sectors,
gfp_t gfp_mask)
{
struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors;
sector_t zone_sectors = blk_queue_zone_sectors(q);
sector_t end_sector = sector + nr_sectors;
struct bio *bio = NULL;
struct blk_plug plug;
int ret;
if (!blk_queue_is_zoned(q))
......@@ -267,15 +252,14 @@ int blkdev_reset_zones(struct block_device *bdev,
if (bdev_read_only(bdev))
return -EPERM;
if (!op_is_zone_mgmt(op))
return -EOPNOTSUPP;
if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
/* Out of range */
return -EINVAL;
if (blkdev_allow_reset_all_zones(bdev, nr_sectors))
return __blkdev_reset_all_zones(bdev, gfp_mask);
/* Check alignment (handle eventual smaller last zone) */
zone_sectors = blk_queue_zone_sectors(q);
if (sector & (zone_sectors - 1))
return -EINVAL;
......@@ -283,29 +267,34 @@ int blkdev_reset_zones(struct block_device *bdev,
end_sector != bdev->bd_part->nr_sects)
return -EINVAL;
blk_start_plug(&plug);
while (sector < end_sector) {
bio = blk_next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
/*
* Special case for the zone reset operation that reset all
* zones, this is useful for applications like mkfs.
*/
if (op == REQ_OP_ZONE_RESET &&
blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
break;
}
bio->bi_opf = op;
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
/* This may take a while, so be nice to others */
cond_resched();
}
ret = submit_bio_wait(bio);
bio_put(bio);
blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL_GPL(blkdev_reset_zones);
EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
/*
* BLKREPORTZONE ioctl processing.
......@@ -368,15 +357,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
}
/*
* BLKRESETZONE ioctl processing.
* BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
* Called from blkdev_ioctl.
*/
int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
struct request_queue *q;
struct blk_zone_range zrange;
enum req_opf op;
if (!argp)
return -EINVAL;
......@@ -397,7 +387,24 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
return -EFAULT;
return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
switch (cmd) {
case BLKRESETZONE:
op = REQ_OP_ZONE_RESET;
break;
case BLKOPENZONE:
op = REQ_OP_ZONE_OPEN;
break;
case BLKCLOSEZONE:
op = REQ_OP_ZONE_CLOSE;
break;
case BLKFINISHZONE:
op = REQ_OP_ZONE_FINISH;
break;
default:
return -ENOTTY;
}
return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
GFP_KERNEL);
}
......
......@@ -242,14 +242,11 @@ int blk_dev_init(void);
* Contribute to IO statistics IFF:
*
* a) it's attached to a gendisk, and
* b) the queue had IO stats enabled when this request was started, and
* c) it's a file system request
* b) the queue had IO stats enabled when this request was started
*/
static inline bool blk_do_io_stat(struct request *rq)
{
return rq->rq_disk &&
(rq->rq_flags & RQF_IO_STAT) &&
!blk_rq_is_passthrough(rq);
return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT);
}
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
......
......@@ -831,3 +831,12 @@ struct request *elv_rb_latter_request(struct request_queue *q,
return NULL;
}
EXPORT_SYMBOL(elv_rb_latter_request);
static int __init elevator_setup(char *str)
{
pr_warn("Kernel parameter elevator= does not have any effect anymore.\n"
"Please use sysfs to set IO scheduler for individual devices.\n");
return 1;
}
__setup("elevator=", elevator_setup);
......@@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
case BLKREPORTZONE:
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
case BLKOPENZONE:
case BLKCLOSEZONE:
case BLKFINISHZONE:
return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg);
case BLKGETZONESZ:
return put_uint(arg, bdev_zone_sectors(bdev));
case BLKGETNRZONES:
......
......@@ -76,7 +76,6 @@ enum opal_response_token {
* Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00
* Section: 6.3 Assigned UIDs
*/
#define OPAL_UID_LENGTH 8
#define OPAL_METHOD_LENGTH 8
#define OPAL_MSID_KEYLEN 15
#define OPAL_UID_LENGTH_HALF 4
......@@ -108,6 +107,7 @@ enum opal_uid {
OPAL_C_PIN_TABLE,
OPAL_LOCKING_INFO_TABLE,
OPAL_ENTERPRISE_LOCKING_INFO_TABLE,
OPAL_DATASTORE,
/* C_PIN_TABLE object ID's */
OPAL_C_PIN_MSID,
OPAL_C_PIN_SID,
......
This diff is collapsed.
......@@ -235,16 +235,12 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION);
}
/**
* Type 3 does not have a reference tag so no remapping is required.
*/
/* Type 3 does not have a reference tag so no remapping is required. */
static void t10_pi_type3_prepare(struct request *rq)
{
}
/**
* Type 3 does not have a reference tag so no remapping is required.
*/
/* Type 3 does not have a reference tag so no remapping is required. */
static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes)
{
}
......
......@@ -1312,7 +1312,7 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
struct dmz_dev *dev = zmd->dev;
ret = blkdev_reset_zones(dev->bdev,
ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
dmz_start_sect(zmd, zone),
dev->zone_nr_sectors, GFP_NOIO);
if (ret) {
......
......@@ -1403,10 +1403,6 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty)
"resized disk %s\n",
bdev->bd_disk ? bdev->bd_disk->disk_name : "");
}
if (!bdev->bd_disk)
return;
if (disk_part_scan_enabled(bdev->bd_disk))
bdev->bd_invalidated = 1;
}
......@@ -1512,6 +1508,19 @@ EXPORT_SYMBOL(bd_set_size);
static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
static void bdev_disk_changed(struct block_device *bdev, bool invalidate)
{
if (disk_part_scan_enabled(bdev->bd_disk)) {
if (invalidate)
invalidate_partitions(bdev->bd_disk, bdev);
else
rescan_partitions(bdev->bd_disk, bdev);
} else {
check_disk_size_change(bdev->bd_disk, bdev, !invalidate);
bdev->bd_invalidated = 0;
}
}
/*
* bd_mutex locking:
*
......@@ -1594,12 +1603,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
* The latter is necessary to prevent ghost
* partitions on a removed medium.
*/
if (bdev->bd_invalidated) {
if (!ret)
rescan_partitions(disk, bdev);
else if (ret == -ENOMEDIUM)
invalidate_partitions(disk, bdev);
}
if (bdev->bd_invalidated &&
(!ret || ret == -ENOMEDIUM))
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
goto out_clear;
......@@ -1632,12 +1638,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
/* the same as first opener case, read comment there */
if (bdev->bd_invalidated) {
if (!ret)
rescan_partitions(bdev->bd_disk, bdev);
else if (ret == -ENOMEDIUM)
invalidate_partitions(bdev->bd_disk, bdev);
}
if (bdev->bd_invalidated &&
(!ret || ret == -ENOMEDIUM))
bdev_disk_changed(bdev, ret == -ENOMEDIUM);
if (ret)
goto out_unlock_bdev;
}
......
......@@ -1771,7 +1771,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS);
return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
sector, nr_sects, GFP_NOFS);
}
/* For conventional zones, use regular discard if supported */
......
......@@ -261,7 +261,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
static bool rw_hint_valid(enum rw_hint hint)
{
switch (hint) {
case RWF_WRITE_LIFE_NOT_SET:
case RWH_WRITE_LIFE_NOT_SET:
case RWH_WRITE_LIFE_NONE:
case RWH_WRITE_LIFE_SHORT:
case RWH_WRITE_LIFE_MEDIUM:
......
This diff is collapsed.
......@@ -153,10 +153,10 @@ struct bio {
unsigned short bi_write_hint;
blk_status_t bi_status;
u8 bi_partno;
atomic_t __bi_remaining;
struct bvec_iter bi_iter;
atomic_t __bi_remaining;
bio_end_io_t *bi_end_io;
void *bi_private;
......@@ -290,6 +290,12 @@ enum req_opf {
REQ_OP_ZONE_RESET_ALL = 8,
/* write the zero filled sector many times */
REQ_OP_WRITE_ZEROES = 9,
/* Open a zone */
REQ_OP_ZONE_OPEN = 10,
/* Close a zone */
REQ_OP_ZONE_CLOSE = 11,
/* Transition a zone to full */
REQ_OP_ZONE_FINISH = 12,
/* SCSI passthrough using struct scsi_request */
REQ_OP_SCSI_IN = 32,
......@@ -417,6 +423,25 @@ static inline bool op_is_discard(unsigned int op)
return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
}
/*
* Check if a bio or request operation is a zone management operation, with
* the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
* due to its different handling in the block layer and device response in
* case of command failure.
*/
static inline bool op_is_zone_mgmt(enum req_opf op)
{
switch (op & REQ_OP_MASK) {
case REQ_OP_ZONE_RESET:
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
case REQ_OP_ZONE_FINISH:
return true;
default:
return false;
}
}
static inline int op_stat_group(unsigned int op)
{
if (op_is_discard(op))
......
......@@ -360,13 +360,14 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev);
extern int blkdev_report_zones(struct block_device *bdev,
sector_t sector, struct blk_zone *zones,
unsigned int *nr_zones);
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
sector_t nr_sectors, gfp_t gfp_mask);
extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
sector_t sectors, sector_t nr_sectors,
gfp_t gfp_mask);
extern int blk_revalidate_disk_zones(struct gendisk *disk);
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg);
extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg);
#else /* CONFIG_BLK_DEV_ZONED */
......@@ -388,7 +389,7 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
return -ENOTTY;
}
static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
fmode_t mode, unsigned int cmd,
unsigned long arg)
{
......@@ -411,7 +412,6 @@ struct request_queue {
/* sw queues */
struct blk_mq_ctx __percpu *queue_ctx;
unsigned int nr_queues;
unsigned int queue_depth;
......
......@@ -42,6 +42,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
case IOC_OPAL_PSID_REVERT_TPR:
case IOC_OPAL_MBR_DONE:
case IOC_OPAL_WRITE_SHADOW_MBR:
case IOC_OPAL_GENERIC_TABLE_RW:
return true;
}
return false;
......
......@@ -33,7 +33,8 @@ TRACE_EVENT(wbt_stat,
),
TP_fast_assign(
strncpy(__entry->name, dev_name(bdi->dev), 32);
strlcpy(__entry->name, dev_name(bdi->dev),
ARRAY_SIZE(__entry->name));
__entry->rmean = stat[0].mean;
__entry->rmin = stat[0].min;
__entry->rmax = stat[0].max;
......@@ -67,7 +68,8 @@ TRACE_EVENT(wbt_lat,
),
TP_fast_assign(
strncpy(__entry->name, dev_name(bdi->dev), 32);
strlcpy(__entry->name, dev_name(bdi->dev),
ARRAY_SIZE(__entry->name));
__entry->lat = div_u64(lat, 1000);
),
......@@ -103,7 +105,8 @@ TRACE_EVENT(wbt_step,
),
TP_fast_assign(
strncpy(__entry->name, dev_name(bdi->dev), 32);
strlcpy(__entry->name, dev_name(bdi->dev),
ARRAY_SIZE(__entry->name));
__entry->msg = msg;
__entry->step = step;
__entry->window = div_u64(window, 1000);
......@@ -138,7 +141,8 @@ TRACE_EVENT(wbt_timer,
),
TP_fast_assign(
strncpy(__entry->name, dev_name(bdi->dev), 32);
strlcpy(__entry->name, dev_name(bdi->dev),
ARRAY_SIZE(__entry->name));
__entry->status = status;
__entry->step = step;
__entry->inflight = inflight;
......
......@@ -120,9 +120,11 @@ struct blk_zone_report {
};
/**
* struct blk_zone_range - BLKRESETZONE ioctl request
* @sector: starting sector of the first zone to issue reset write pointer
* @nr_sectors: Total number of sectors of 1 or more zones to reset
* struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/
* BLKCLOSEZONE/BLKFINISHZONE ioctl
* requests
* @sector: Starting sector of the first zone to operate on.
* @nr_sectors: Total number of sectors of all zones to operate on.
*/
struct blk_zone_range {
__u64 sector;
......@@ -139,10 +141,19 @@ struct blk_zone_range {
* sector range. The sector range must be zone aligned.
* @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
* @BLKGETNRZONES: Get the total number of zones of the device.
* @BLKOPENZONE: Open the zones in the specified sector range.
* The 512 B sector range must be zone aligned.
* @BLKCLOSEZONE: Close the zones in the specified sector range.
* The 512 B sector range must be zone aligned.
* @BLKFINISHZONE: Mark the zones as full in the specified sector range.
* The 512 B sector range must be zone aligned.
*/
#define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
#define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
#define BLKGETZONESZ _IOR(0x12, 132, __u32)
#define BLKGETNRZONES _IOR(0x12, 133, __u32)
#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range)
#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range)
#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range)
#endif /* _UAPI_BLKZONED_H */
......@@ -58,13 +58,20 @@
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
* used to clear any hints previously set.
*/
#define RWF_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NONE 1
#define RWH_WRITE_LIFE_SHORT 2
#define RWH_WRITE_LIFE_MEDIUM 3
#define RWH_WRITE_LIFE_LONG 4
#define RWH_WRITE_LIFE_EXTREME 5
/*
* The originally introduced spelling is remained from the first
* versions of the patch set that introduced the feature, see commit
* v4.13-rc1~212^2~51.
*/
#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
/*
* Types of directory notifications that may be requested.
*/
......
......@@ -113,6 +113,25 @@ struct opal_shadow_mbr {
__u64 size;
};
/* Opal table operations */
enum opal_table_ops {
OPAL_READ_TABLE,
OPAL_WRITE_TABLE,
};
#define OPAL_UID_LENGTH 8
struct opal_read_write_table {
struct opal_key key;
const __u64 data;
const __u8 table_uid[OPAL_UID_LENGTH];
__u64 offset;
__u64 size;
#define OPAL_TABLE_READ (1 << OPAL_READ_TABLE)
#define OPAL_TABLE_WRITE (1 << OPAL_WRITE_TABLE)
__u64 flags;
__u64 priv;
};
#define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock)
#define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock)
#define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key)
......@@ -128,5 +147,6 @@ struct opal_shadow_mbr {
#define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key)
#define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done)
#define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr)
#define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table)
#endif /* _UAPI_SED_OPAL_H */
......@@ -58,13 +58,20 @@
* Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
* used to clear any hints previously set.
*/
#define RWF_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NOT_SET 0
#define RWH_WRITE_LIFE_NONE 1
#define RWH_WRITE_LIFE_SHORT 2
#define RWH_WRITE_LIFE_MEDIUM 3
#define RWH_WRITE_LIFE_LONG 4
#define RWH_WRITE_LIFE_EXTREME 5
/*
* The originally introduced spelling is remained from the first
* versions of the patch set that introduced the feature, see commit
* v4.13-rc1~212^2~51.
*/
#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET
/*
* Types of directory notifications that may be requested.
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment