Commit 6bf9c47a authored by Jens Axboe's avatar Jens Axboe

io_uring: defer file assignment

If an application uses direct open or accept, it knows in advance what
direct descriptor value it will get as it picks it itself. This allows
combined requests such as:

sqe = io_uring_get_sqe(ring);
io_uring_prep_openat_direct(sqe, ..., file_slot);
sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS;

sqe = io_uring_get_sqe(ring);
io_uring_prep_read(sqe,file_slot, buf, buf_size, 0);
sqe->flags |= IOSQE_FIXED_FILE;

io_uring_submit(ring);

where we prepare both a file open and read, and only get a completion
event for the read when both have completed successfully.

Currently links are fully prepared before the head is issued, but that
fails if the dependent link needs a file assigned that isn't valid until
the head has completed.

Conversely, if the same chain is performed but the fixed file slot is
already valid, then we would be unexpectedly returning data from the
old file slot rather than the newly opened one. Make sure we're
consistent here.

Allow deferral of file setup, which makes this documented case work.

Cc: stable@vger.kernel.org # v5.15+
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 5106dd6e
...@@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack) ...@@ -155,6 +155,7 @@ struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack)
struct io_wq_work { struct io_wq_work {
struct io_wq_work_node list; struct io_wq_work_node list;
unsigned flags; unsigned flags;
int fd;
}; };
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
......
...@@ -7240,6 +7240,23 @@ static void io_clean_op(struct io_kiocb *req) ...@@ -7240,6 +7240,23 @@ static void io_clean_op(struct io_kiocb *req)
req->flags &= ~IO_REQ_CLEAN_FLAGS; req->flags &= ~IO_REQ_CLEAN_FLAGS;
} }
static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags)
{
if (req->file || !io_op_defs[req->opcode].needs_file)
return true;
if (req->flags & REQ_F_FIXED_FILE)
req->file = io_file_get_fixed(req, req->work.fd, issue_flags);
else
req->file = io_file_get_normal(req, req->work.fd);
if (req->file)
return true;
req_set_fail(req);
req->result = -EBADF;
return false;
}
static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
{ {
const struct cred *creds = NULL; const struct cred *creds = NULL;
...@@ -7250,6 +7267,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) ...@@ -7250,6 +7267,8 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
if (!io_op_defs[req->opcode].audit_skip) if (!io_op_defs[req->opcode].audit_skip)
audit_uring_entry(req->opcode); audit_uring_entry(req->opcode);
if (unlikely(!io_assign_file(req, issue_flags)))
return -EBADF;
switch (req->opcode) { switch (req->opcode) {
case IORING_OP_NOP: case IORING_OP_NOP:
...@@ -7394,10 +7413,11 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) ...@@ -7394,10 +7413,11 @@ static struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
static void io_wq_submit_work(struct io_wq_work *work) static void io_wq_submit_work(struct io_wq_work *work)
{ {
struct io_kiocb *req = container_of(work, struct io_kiocb, work); struct io_kiocb *req = container_of(work, struct io_kiocb, work);
const struct io_op_def *def = &io_op_defs[req->opcode];
unsigned int issue_flags = IO_URING_F_UNLOCKED; unsigned int issue_flags = IO_URING_F_UNLOCKED;
bool needs_poll = false; bool needs_poll = false;
struct io_kiocb *timeout; struct io_kiocb *timeout;
int ret = 0; int ret = 0, err = -ECANCELED;
/* one will be dropped by ->io_free_work() after returning to io-wq */ /* one will be dropped by ->io_free_work() after returning to io-wq */
if (!(req->flags & REQ_F_REFCOUNT)) if (!(req->flags & REQ_F_REFCOUNT))
...@@ -7409,14 +7429,18 @@ static void io_wq_submit_work(struct io_wq_work *work) ...@@ -7409,14 +7429,18 @@ static void io_wq_submit_work(struct io_wq_work *work)
if (timeout) if (timeout)
io_queue_linked_timeout(timeout); io_queue_linked_timeout(timeout);
if (!io_assign_file(req, issue_flags)) {
err = -EBADF;
work->flags |= IO_WQ_WORK_CANCEL;
}
/* either cancelled or io-wq is dying, so don't touch tctx->iowq */ /* either cancelled or io-wq is dying, so don't touch tctx->iowq */
if (work->flags & IO_WQ_WORK_CANCEL) { if (work->flags & IO_WQ_WORK_CANCEL) {
io_req_task_queue_fail(req, -ECANCELED); io_req_task_queue_fail(req, err);
return; return;
} }
if (req->flags & REQ_F_FORCE_ASYNC) { if (req->flags & REQ_F_FORCE_ASYNC) {
const struct io_op_def *def = &io_op_defs[req->opcode];
bool opcode_poll = def->pollin || def->pollout; bool opcode_poll = def->pollin || def->pollout;
if (opcode_poll && file_can_poll(req->file)) { if (opcode_poll && file_can_poll(req->file)) {
...@@ -7749,6 +7773,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -7749,6 +7773,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
if (io_op_defs[opcode].needs_file) { if (io_op_defs[opcode].needs_file) {
struct io_submit_state *state = &ctx->submit_state; struct io_submit_state *state = &ctx->submit_state;
req->work.fd = READ_ONCE(sqe->fd);
/* /*
* Plug now if we have more than 2 IO left after this, and the * Plug now if we have more than 2 IO left after this, and the
* target is potentially a read/write to block based storage. * target is potentially a read/write to block based storage.
...@@ -7758,13 +7784,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, ...@@ -7758,13 +7784,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
state->need_plug = false; state->need_plug = false;
blk_start_plug_nr_ios(&state->plug, state->submit_nr); blk_start_plug_nr_ios(&state->plug, state->submit_nr);
} }
if (req->flags & REQ_F_FIXED_FILE)
req->file = io_file_get_fixed(req, READ_ONCE(sqe->fd), 0);
else
req->file = io_file_get_normal(req, READ_ONCE(sqe->fd));
if (unlikely(!req->file))
return -EBADF;
} }
personality = READ_ONCE(sqe->personality); personality = READ_ONCE(sqe->personality);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment