Commit 4869f575 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-6.1-2022-11-05' of git://git.kernel.dk/linux

Pull block fixes from Jens Axboe:

 - Fixes for the ublk driver (Ming)

 - Fixes for error handling memory leaks (Chen Jun, Chen Zhongjin)

 - Explicitly clear the last request in a chain when the plug is
   flushed, as it may have already been issued (Al)

* tag 'block-6.1-2022-11-05' of git://git.kernel.dk/linux:
  block: blk_add_rq_to_plug(): clear stale 'last' after flush
  blk-mq: Fix kmemleak in blk_mq_init_allocated_queue
  block: Fix possible memory leak for rq_wb on add_disk failure
  ublk_drv: add ublk_queue_cmd() for cleanup
  ublk_drv: avoid to touch io_uring cmd in blk_mq io path
  ublk_drv: comment on ublk_driver entry of Kconfig
  ublk_drv: return flag of UBLK_F_URING_CMD_COMP_IN_TASK in case of module
parents b208b9fb 878eb6e4
...@@ -1262,6 +1262,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) ...@@ -1262,6 +1262,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
(!blk_queue_nomerges(rq->q) && (!blk_queue_nomerges(rq->q) &&
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) { blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_mq_flush_plug_list(plug, false); blk_mq_flush_plug_list(plug, false);
last = NULL;
trace_block_plug(rq->q); trace_block_plug(rq->q);
} }
...@@ -4193,9 +4194,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, ...@@ -4193,9 +4194,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
return 0; return 0;
err_hctxs: err_hctxs:
xa_destroy(&q->hctx_table); blk_mq_release(q);
q->nr_hw_queues = 0;
blk_mq_sysfs_deinit(q);
err_poll: err_poll:
blk_stat_free_callback(q->poll_cb); blk_stat_free_callback(q->poll_cb);
q->poll_cb = NULL; q->poll_cb = NULL;
......
...@@ -527,6 +527,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, ...@@ -527,6 +527,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
bdi_unregister(disk->bdi); bdi_unregister(disk->bdi);
out_unregister_queue: out_unregister_queue:
blk_unregister_queue(disk); blk_unregister_queue(disk);
rq_qos_exit(disk->queue);
out_put_slave_dir: out_put_slave_dir:
kobject_put(disk->slave_dir); kobject_put(disk->slave_dir);
out_put_holder_dir: out_put_holder_dir:
......
...@@ -408,6 +408,12 @@ config BLK_DEV_UBLK ...@@ -408,6 +408,12 @@ config BLK_DEV_UBLK
definition isn't finalized yet, and might change according to future definition isn't finalized yet, and might change according to future
requirement, so mark is as experimental now. requirement, so mark is as experimental now.
Say Y if you want to get better performance because task_work_add()
can be used in IO path for replacing io_uring cmd, which will become
shared between IO tasks and ubq daemon, meantime task_work_add() can
can handle batch more effectively, but task_work_add() isn't exported
for module, so ublk has to be built to kernel.
source "drivers/block/rnbd/Kconfig" source "drivers/block/rnbd/Kconfig"
endif # BLK_DEV endif # BLK_DEV
...@@ -57,11 +57,14 @@ ...@@ -57,11 +57,14 @@
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD) #define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
struct ublk_rq_data { struct ublk_rq_data {
union {
struct callback_head work; struct callback_head work;
struct llist_node node;
};
}; };
struct ublk_uring_cmd_pdu { struct ublk_uring_cmd_pdu {
struct request *req; struct ublk_queue *ubq;
}; };
/* /*
...@@ -119,6 +122,8 @@ struct ublk_queue { ...@@ -119,6 +122,8 @@ struct ublk_queue {
struct task_struct *ubq_daemon; struct task_struct *ubq_daemon;
char *io_cmd_buf; char *io_cmd_buf;
struct llist_head io_cmds;
unsigned long io_addr; /* mapped vm address */ unsigned long io_addr; /* mapped vm address */
unsigned int max_io_sz; unsigned int max_io_sz;
bool force_abort; bool force_abort;
...@@ -764,8 +769,12 @@ static inline void __ublk_rq_task_work(struct request *req) ...@@ -764,8 +769,12 @@ static inline void __ublk_rq_task_work(struct request *req)
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
{ {
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
struct ublk_queue *ubq = pdu->ubq;
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
struct ublk_rq_data *data;
__ublk_rq_task_work(pdu->req); llist_for_each_entry(data, io_cmds, node)
__ublk_rq_task_work(blk_mq_rq_from_pdu(data));
} }
static void ublk_rq_task_work_fn(struct callback_head *work) static void ublk_rq_task_work_fn(struct callback_head *work)
...@@ -777,6 +786,54 @@ static void ublk_rq_task_work_fn(struct callback_head *work) ...@@ -777,6 +786,54 @@ static void ublk_rq_task_work_fn(struct callback_head *work)
__ublk_rq_task_work(req); __ublk_rq_task_work(req);
} }
static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq)
{
struct ublk_io *io = &ubq->ios[rq->tag];
/*
* If the check pass, we know that this is a re-issued request aborted
* previously in monitor_work because the ubq_daemon(cmd's task) is
* PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
* because this ioucmd's io_uring context may be freed now if no inflight
* ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
*
* Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
* the tag). Then the request is re-started(allocating the tag) and we are here.
* Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
* guarantees that here is a re-issued request aborted previously.
*/
if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
struct ublk_rq_data *data;
llist_for_each_entry(data, io_cmds, node)
__ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
} else {
struct io_uring_cmd *cmd = io->cmd;
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
pdu->ubq = ubq;
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
}
}
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq,
bool last)
{
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
if (ublk_can_use_task_work(ubq)) {
enum task_work_notify_mode notify_mode = last ?
TWA_SIGNAL_NO_IPI : TWA_NONE;
if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
__ublk_abort_rq(ubq, rq);
} else {
if (llist_add(&data->node, &ubq->io_cmds))
ublk_submit_cmd(ubq, rq);
}
}
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd) const struct blk_mq_queue_data *bd)
{ {
...@@ -788,6 +845,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -788,6 +845,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
res = ublk_setup_iod(ubq, rq); res = ublk_setup_iod(ubq, rq);
if (unlikely(res != BLK_STS_OK)) if (unlikely(res != BLK_STS_OK))
return BLK_STS_IOERR; return BLK_STS_IOERR;
/* With recovery feature enabled, force_abort is set in /* With recovery feature enabled, force_abort is set in
* ublk_stop_dev() before calling del_gendisk(). We have to * ublk_stop_dev() before calling del_gendisk(). We have to
* abort all requeued and new rqs here to let del_gendisk() * abort all requeued and new rqs here to let del_gendisk()
...@@ -803,41 +861,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -803,41 +861,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(bd->rq); blk_mq_start_request(bd->rq);
if (unlikely(ubq_daemon_is_dying(ubq))) { if (unlikely(ubq_daemon_is_dying(ubq))) {
fail:
__ublk_abort_rq(ubq, rq); __ublk_abort_rq(ubq, rq);
return BLK_STS_OK; return BLK_STS_OK;
} }
if (ublk_can_use_task_work(ubq)) { ublk_queue_cmd(ubq, rq, bd->last);
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
enum task_work_notify_mode notify_mode = bd->last ?
TWA_SIGNAL_NO_IPI : TWA_NONE;
if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
goto fail;
} else {
struct ublk_io *io = &ubq->ios[rq->tag];
struct io_uring_cmd *cmd = io->cmd;
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
/*
* If the check pass, we know that this is a re-issued request aborted
* previously in monitor_work because the ubq_daemon(cmd's task) is
* PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
* because this ioucmd's io_uring context may be freed now if no inflight
* ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
*
* Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
* the tag). Then the request is re-started(allocating the tag) and we are here.
* Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
* guarantees that here is a re-issued request aborted previously.
*/
if ((io->flags & UBLK_IO_FLAG_ABORTED))
goto fail;
pdu->req = rq;
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
}
return BLK_STS_OK; return BLK_STS_OK;
} }
...@@ -1164,22 +1192,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq) ...@@ -1164,22 +1192,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
} }
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id, static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
int tag, struct io_uring_cmd *cmd) int tag)
{ {
struct ublk_queue *ubq = ublk_get_queue(ub, q_id); struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag); struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
if (ublk_can_use_task_work(ubq)) { ublk_queue_cmd(ubq, req, true);
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
/* should not fail since we call it just in ubq->ubq_daemon */
task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
} else {
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
pdu->req = req;
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
}
} }
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
...@@ -1267,7 +1285,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) ...@@ -1267,7 +1285,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
io->addr = ub_cmd->addr; io->addr = ub_cmd->addr;
io->cmd = cmd; io->cmd = cmd;
io->flags |= UBLK_IO_FLAG_ACTIVE; io->flags |= UBLK_IO_FLAG_ACTIVE;
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd); ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
break; break;
default: default:
goto out; goto out;
...@@ -1658,6 +1676,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd) ...@@ -1658,6 +1676,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
*/ */
ub->dev_info.flags &= UBLK_F_ALL; ub->dev_info.flags &= UBLK_F_ALL;
if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
/* We are not ready to support zero copy */ /* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY; ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment