Commit 6d9e6dfd authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Jens Axboe

ublk: defer disk allocation

Defer allocating the gendisk and request_queue until UBLK_CMD_START_DEV
is called.  This avoids funky life times where a disk is allocated
and then can be added and removed multiple times, which has never been
supported by the block layer.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarMing Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20220721130916.1869719-9-hch@lst.deSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent c50061f0
...@@ -112,7 +112,6 @@ struct ublk_queue { ...@@ -112,7 +112,6 @@ struct ublk_queue {
struct ublk_device { struct ublk_device {
struct gendisk *ub_disk; struct gendisk *ub_disk;
struct request_queue *ub_queue;
char *__queues; char *__queues;
...@@ -126,6 +125,7 @@ struct ublk_device { ...@@ -126,6 +125,7 @@ struct ublk_device {
struct device cdev_dev; struct device cdev_dev;
#define UB_STATE_OPEN (1 << 0) #define UB_STATE_OPEN (1 << 0)
#define UB_STATE_USED (1 << 1)
unsigned long state; unsigned long state;
int ub_number; int ub_number;
...@@ -156,8 +156,6 @@ static DEFINE_MUTEX(ublk_ctl_mutex); ...@@ -156,8 +156,6 @@ static DEFINE_MUTEX(ublk_ctl_mutex);
static struct miscdevice ublk_misc; static struct miscdevice ublk_misc;
static struct lock_class_key ublk_bio_compl_lkclass;
static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq) static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
{ {
if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) && if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
...@@ -209,8 +207,17 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id) ...@@ -209,8 +207,17 @@ static inline int ublk_queue_cmd_buf_size(struct ublk_device *ub, int q_id)
PAGE_SIZE); PAGE_SIZE);
} }
static void ublk_free_disk(struct gendisk *disk)
{
struct ublk_device *ub = disk->private_data;
clear_bit(UB_STATE_USED, &ub->state);
put_device(&ub->cdev_dev);
}
static const struct block_device_operations ub_fops = { static const struct block_device_operations ub_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.free_disk = ublk_free_disk,
}; };
#define UBLK_MAX_PIN_PAGES 32 #define UBLK_MAX_PIN_PAGES 32
...@@ -801,13 +808,15 @@ static void ublk_cancel_dev(struct ublk_device *ub) ...@@ -801,13 +808,15 @@ static void ublk_cancel_dev(struct ublk_device *ub)
static void ublk_stop_dev(struct ublk_device *ub) static void ublk_stop_dev(struct ublk_device *ub)
{ {
mutex_lock(&ub->mutex); mutex_lock(&ub->mutex);
if (!disk_live(ub->ub_disk)) if (ub->dev_info.state != UBLK_S_DEV_LIVE)
goto unlock; goto unlock;
del_gendisk(ub->ub_disk); del_gendisk(ub->ub_disk);
ub->dev_info.state = UBLK_S_DEV_DEAD; ub->dev_info.state = UBLK_S_DEV_DEAD;
ub->dev_info.ublksrv_pid = -1; ub->dev_info.ublksrv_pid = -1;
ublk_cancel_dev(ub); ublk_cancel_dev(ub);
put_disk(ub->ub_disk);
ub->ub_disk = NULL;
unlock: unlock:
mutex_unlock(&ub->mutex); mutex_unlock(&ub->mutex);
cancel_delayed_work_sync(&ub->monitor_work); cancel_delayed_work_sync(&ub->monitor_work);
...@@ -1033,12 +1042,7 @@ static void ublk_cdev_rel(struct device *dev) ...@@ -1033,12 +1042,7 @@ static void ublk_cdev_rel(struct device *dev)
{ {
struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev); struct ublk_device *ub = container_of(dev, struct ublk_device, cdev_dev);
blk_mq_destroy_queue(ub->ub_queue);
put_disk(ub->ub_disk);
blk_mq_free_tag_set(&ub->tag_set); blk_mq_free_tag_set(&ub->tag_set);
ublk_deinit_queues(ub); ublk_deinit_queues(ub);
__ublk_destroy_dev(ub); __ublk_destroy_dev(ub);
...@@ -1078,31 +1082,24 @@ static void ublk_stop_work_fn(struct work_struct *work) ...@@ -1078,31 +1082,24 @@ static void ublk_stop_work_fn(struct work_struct *work)
ublk_stop_dev(ub); ublk_stop_dev(ub);
} }
static void ublk_update_capacity(struct ublk_device *ub) /* align maximum I/O size to PAGE_SIZE */
static void ublk_align_max_io_size(struct ublk_device *ub)
{ {
unsigned int max_rq_bytes; unsigned int max_rq_bytes = ub->dev_info.rq_max_blocks << ub->bs_shift;
/* make max request buffer size aligned with PAGE_SIZE */ ub->dev_info.rq_max_blocks =
max_rq_bytes = round_down(ub->dev_info.rq_max_blocks << round_down(max_rq_bytes, PAGE_SIZE) >> ub->bs_shift;
ub->bs_shift, PAGE_SIZE);
ub->dev_info.rq_max_blocks = max_rq_bytes >> ub->bs_shift;
set_capacity(ub->ub_disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9));
} }
/* add disk & cdev, cleanup everything in case of failure */ /* add tag_set & cdev, cleanup everything in case of failure */
static int ublk_add_dev(struct ublk_device *ub) static int ublk_add_dev(struct ublk_device *ub)
{ {
struct gendisk *disk;
int err = -ENOMEM; int err = -ENOMEM;
int bsize;
/* We are not ready to support zero copy */ /* We are not ready to support zero copy */
ub->dev_info.flags[0] &= ~UBLK_F_SUPPORT_ZERO_COPY; ub->dev_info.flags[0] &= ~UBLK_F_SUPPORT_ZERO_COPY;
bsize = ub->dev_info.block_size; ub->bs_shift = ilog2(ub->dev_info.block_size);
ub->bs_shift = ilog2(bsize);
ub->dev_info.nr_hw_queues = min_t(unsigned int, ub->dev_info.nr_hw_queues = min_t(unsigned int,
ub->dev_info.nr_hw_queues, nr_cpu_ids); ub->dev_info.nr_hw_queues, nr_cpu_ids);
...@@ -1119,59 +1116,16 @@ static int ublk_add_dev(struct ublk_device *ub) ...@@ -1119,59 +1116,16 @@ static int ublk_add_dev(struct ublk_device *ub)
ub->tag_set.cmd_size = sizeof(struct ublk_rq_data); ub->tag_set.cmd_size = sizeof(struct ublk_rq_data);
ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ub->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ub->tag_set.driver_data = ub; ub->tag_set.driver_data = ub;
err = blk_mq_alloc_tag_set(&ub->tag_set); err = blk_mq_alloc_tag_set(&ub->tag_set);
if (err) if (err)
goto out_deinit_queues; goto out_deinit_queues;
ub->ub_queue = blk_mq_init_queue(&ub->tag_set); ublk_align_max_io_size(ub);
if (IS_ERR(ub->ub_queue)) {
err = PTR_ERR(ub->ub_queue);
goto out_cleanup_tags;
}
ub->ub_queue->queuedata = ub;
disk = ub->ub_disk = blk_mq_alloc_disk_for_queue(ub->ub_queue,
&ublk_bio_compl_lkclass);
if (!disk) {
err = -ENOMEM;
goto out_free_request_queue;
}
blk_queue_logical_block_size(ub->ub_queue, bsize);
blk_queue_physical_block_size(ub->ub_queue, bsize);
blk_queue_io_min(ub->ub_queue, bsize);
blk_queue_max_hw_sectors(ub->ub_queue, ub->dev_info.rq_max_blocks <<
(ub->bs_shift - 9));
ub->ub_queue->limits.discard_granularity = PAGE_SIZE;
blk_queue_max_discard_sectors(ub->ub_queue, UINT_MAX >> 9);
blk_queue_max_write_zeroes_sectors(ub->ub_queue, UINT_MAX >> 9);
ublk_update_capacity(ub);
disk->fops = &ub_fops;
disk->private_data = ub;
disk->queue = ub->ub_queue;
sprintf(disk->disk_name, "ublkb%d", ub->ub_number);
mutex_init(&ub->mutex); mutex_init(&ub->mutex);
/* add char dev so that ublksrv daemon can be setup */ /* add char dev so that ublksrv daemon can be setup */
err = ublk_add_chdev(ub); return ublk_add_chdev(ub);
if (err)
return err;
/* don't expose disk now until we got start command from cdev */
return 0;
out_free_request_queue:
blk_mq_destroy_queue(ub->ub_queue);
out_cleanup_tags:
blk_mq_free_tag_set(&ub->tag_set);
out_deinit_queues: out_deinit_queues:
ublk_deinit_queues(ub); ublk_deinit_queues(ub);
out_destroy_dev: out_destroy_dev:
...@@ -1209,6 +1163,7 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) ...@@ -1209,6 +1163,7 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
int ublksrv_pid = (int)header->data[0]; int ublksrv_pid = (int)header->data[0];
unsigned long dev_blocks = header->data[1]; unsigned long dev_blocks = header->data[1];
struct ublk_device *ub; struct ublk_device *ub;
struct gendisk *disk;
int ret = -EINVAL; int ret = -EINVAL;
if (ublksrv_pid <= 0) if (ublksrv_pid <= 0)
...@@ -1223,21 +1178,45 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd) ...@@ -1223,21 +1178,45 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD); schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
mutex_lock(&ub->mutex); mutex_lock(&ub->mutex);
if (disk_live(ub->ub_disk)) { if (ub->dev_info.state == UBLK_S_DEV_LIVE ||
test_bit(UB_STATE_USED, &ub->state)) {
ret = -EEXIST; ret = -EEXIST;
goto out_unlock; goto out_unlock;
} }
/* We may get disk size updated */ /* We may get disk size updated */
if (dev_blocks) { if (dev_blocks)
ub->dev_info.dev_blocks = dev_blocks; ub->dev_info.dev_blocks = dev_blocks;
ublk_update_capacity(ub);
disk = blk_mq_alloc_disk(&ub->tag_set, ub);
if (IS_ERR(disk)) {
ret = PTR_ERR(disk);
goto out_unlock;
} }
sprintf(disk->disk_name, "ublkb%d", ub->ub_number);
disk->fops = &ub_fops;
disk->private_data = ub;
blk_queue_logical_block_size(disk->queue, ub->dev_info.block_size);
blk_queue_physical_block_size(disk->queue, ub->dev_info.block_size);
blk_queue_io_min(disk->queue, ub->dev_info.block_size);
blk_queue_max_hw_sectors(disk->queue,
ub->dev_info.rq_max_blocks << (ub->bs_shift - 9));
disk->queue->limits.discard_granularity = PAGE_SIZE;
blk_queue_max_discard_sectors(disk->queue, UINT_MAX >> 9);
blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX >> 9);
set_capacity(disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9));
ub->dev_info.ublksrv_pid = ublksrv_pid; ub->dev_info.ublksrv_pid = ublksrv_pid;
ret = add_disk(ub->ub_disk); ub->ub_disk = disk;
if (ret) get_device(&ub->cdev_dev);
ret = add_disk(disk);
if (ret) {
put_disk(disk);
goto out_unlock; goto out_unlock;
}
set_bit(UB_STATE_USED, &ub->state);
ub->dev_info.state = UBLK_S_DEV_LIVE; ub->dev_info.state = UBLK_S_DEV_LIVE;
out_unlock: out_unlock:
mutex_unlock(&ub->mutex); mutex_unlock(&ub->mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment