Commit 9b8c5475 authored by Dylan Yudaken's avatar Dylan Yudaken Committed by Jens Axboe

io_uring: add io_aux_cqe which allows deferred completion

Use the just introduced deferred post cqe completion state when possible
in io_aux_cqe. If not possible fallback to io_post_aux_cqe.

This introduces a complication because of allow_overflow. For deferred
completions we cannot know without locking the completion_lock if it will
overflow (and even if we locked it, another post could sneak in and cause
this cqe to be in overflow).
However since overflow protection is mostly a best effort defence in depth
to prevent infinite loops of CQEs for poll, just checking the overflow bit
is going to be good enough and will result in at most 16 (array size of
deferred cqes) overflows.
Suggested-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarDylan Yudaken <dylany@meta.com>
Link: https://lore.kernel.org/r/20221124093559.3780686-6-dylany@meta.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 931147dd
......@@ -830,6 +830,40 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx,
return filled;
}
bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
bool allow_overflow)
{
struct io_uring_cqe *cqe;
unsigned int length;
if (!defer)
return io_post_aux_cqe(ctx, user_data, res, cflags, allow_overflow);
length = ARRAY_SIZE(ctx->submit_state.cqes);
lockdep_assert_held(&ctx->uring_lock);
if (ctx->submit_state.cqes_count == length) {
io_cq_lock(ctx);
__io_flush_post_cqes(ctx);
/* no need to flush - flush is deferred */
spin_unlock(&ctx->completion_lock);
}
/* For defered completions this is not as strict as it is otherwise,
* however it's main job is to prevent unbounded posted completions,
* and in that it works just as well.
*/
if (!allow_overflow && test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq))
return false;
cqe = &ctx->submit_state.cqes[ctx->submit_state.cqes_count++];
cqe->user_data = user_data;
cqe->res = res;
cqe->flags = cflags;
return true;
}
static void __io_req_complete_post(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
......
......@@ -36,6 +36,8 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
bool allow_overflow);
bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags,
bool allow_overflow);
bool io_aux_cqe(struct io_ring_ctx *ctx, bool defer, u64 user_data, s32 res, u32 cflags,
bool allow_overflow);
void __io_commit_cqring_flush(struct io_ring_ctx *ctx);
static inline void io_req_complete_post_tw(struct io_kiocb *req, bool *locked)
......
......@@ -601,8 +601,8 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
}
if (!mshot_finished) {
if (io_post_aux_cqe(req->ctx, req->cqe.user_data, *ret,
cflags | IORING_CQE_F_MORE, true)) {
if (io_aux_cqe(req->ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
req->cqe.user_data, *ret, cflags | IORING_CQE_F_MORE, true)) {
io_recv_prep_retry(req);
return false;
}
......@@ -1320,7 +1320,8 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags)
if (ret < 0)
return ret;
if (io_post_aux_cqe(ctx, req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
if (io_aux_cqe(ctx, issue_flags & IO_URING_F_COMPLETE_DEFER,
req->cqe.user_data, ret, IORING_CQE_F_MORE, true))
goto retry;
return -ECANCELED;
......
......@@ -252,8 +252,8 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
__poll_t mask = mangle_poll(req->cqe.res &
req->apoll_events);
if (!io_post_aux_cqe(ctx, req->cqe.user_data,
mask, IORING_CQE_F_MORE, false)) {
if (!io_aux_cqe(ctx, *locked, req->cqe.user_data,
mask, IORING_CQE_F_MORE, false)) {
io_req_set_res(req, mask, 0);
return IOU_POLL_REMOVE_POLL_USE_RES;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment