Commit a9165b83 authored by Jens Axboe's avatar Jens Axboe

io_uring/rw: always setup io_async_rw for read/write requests

read/write requests try to put everything on the stack, and then alloc
and copy if a retry is needed. This necessitates a bunch of nasty code
that deals with intermediate state.

Get rid of this, and have the prep side setup everything that is needed
upfront, which greatly simplifies the opcode handlers.

This includes adding an alloc cache for io_async_rw, to make it cheap
to handle.

In terms of cost, this should be basically free and transparent. For
the worst case of {READ,WRITE}_FIXED which didn't need it before,
performance is unaffected in the normal peak workload that is being
used to test that. Still runs at 122M IOPS.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent d80f9407
...@@ -300,6 +300,7 @@ struct io_ring_ctx { ...@@ -300,6 +300,7 @@ struct io_ring_ctx {
struct io_hash_table cancel_table_locked; struct io_hash_table cancel_table_locked;
struct io_alloc_cache apoll_cache; struct io_alloc_cache apoll_cache;
struct io_alloc_cache netmsg_cache; struct io_alloc_cache netmsg_cache;
struct io_alloc_cache rw_cache;
/* /*
* Any cancelable uring_cmd is added to this list in * Any cancelable uring_cmd is added to this list in
......
...@@ -311,6 +311,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ...@@ -311,6 +311,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
sizeof(struct async_poll)); sizeof(struct async_poll));
io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX, io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_msghdr)); sizeof(struct io_async_msghdr));
io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_rw));
io_futex_cache_init(ctx); io_futex_cache_init(ctx);
init_completion(&ctx->ref_comp); init_completion(&ctx->ref_comp);
xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
...@@ -2823,6 +2825,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) ...@@ -2823,6 +2825,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_eventfd_unregister(ctx); io_eventfd_unregister(ctx);
io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free); io_alloc_cache_free(&ctx->apoll_cache, io_apoll_cache_free);
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free); io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
io_futex_cache_free(ctx); io_futex_cache_free(ctx);
io_destroy_buffers(ctx); io_destroy_buffers(ctx);
mutex_unlock(&ctx->uring_lock); mutex_unlock(&ctx->uring_lock);
......
...@@ -67,7 +67,7 @@ const struct io_issue_def io_issue_defs[] = { ...@@ -67,7 +67,7 @@ const struct io_issue_def io_issue_defs[] = {
.iopoll = 1, .iopoll = 1,
.iopoll_queue = 1, .iopoll_queue = 1,
.vectored = 1, .vectored = 1,
.prep = io_prep_rwv, .prep = io_prep_readv,
.issue = io_read, .issue = io_read,
}, },
[IORING_OP_WRITEV] = { [IORING_OP_WRITEV] = {
...@@ -81,7 +81,7 @@ const struct io_issue_def io_issue_defs[] = { ...@@ -81,7 +81,7 @@ const struct io_issue_def io_issue_defs[] = {
.iopoll = 1, .iopoll = 1,
.iopoll_queue = 1, .iopoll_queue = 1,
.vectored = 1, .vectored = 1,
.prep = io_prep_rwv, .prep = io_prep_writev,
.issue = io_write, .issue = io_write,
}, },
[IORING_OP_FSYNC] = { [IORING_OP_FSYNC] = {
...@@ -99,7 +99,7 @@ const struct io_issue_def io_issue_defs[] = { ...@@ -99,7 +99,7 @@ const struct io_issue_def io_issue_defs[] = {
.ioprio = 1, .ioprio = 1,
.iopoll = 1, .iopoll = 1,
.iopoll_queue = 1, .iopoll_queue = 1,
.prep = io_prep_rw_fixed, .prep = io_prep_read_fixed,
.issue = io_read, .issue = io_read,
}, },
[IORING_OP_WRITE_FIXED] = { [IORING_OP_WRITE_FIXED] = {
...@@ -112,7 +112,7 @@ const struct io_issue_def io_issue_defs[] = { ...@@ -112,7 +112,7 @@ const struct io_issue_def io_issue_defs[] = {
.ioprio = 1, .ioprio = 1,
.iopoll = 1, .iopoll = 1,
.iopoll_queue = 1, .iopoll_queue = 1,
.prep = io_prep_rw_fixed, .prep = io_prep_write_fixed,
.issue = io_write, .issue = io_write,
}, },
[IORING_OP_POLL_ADD] = { [IORING_OP_POLL_ADD] = {
...@@ -239,7 +239,7 @@ const struct io_issue_def io_issue_defs[] = { ...@@ -239,7 +239,7 @@ const struct io_issue_def io_issue_defs[] = {
.ioprio = 1, .ioprio = 1,
.iopoll = 1, .iopoll = 1,
.iopoll_queue = 1, .iopoll_queue = 1,
.prep = io_prep_rw, .prep = io_prep_read,
.issue = io_read, .issue = io_read,
}, },
[IORING_OP_WRITE] = { [IORING_OP_WRITE] = {
...@@ -252,7 +252,7 @@ const struct io_issue_def io_issue_defs[] = { ...@@ -252,7 +252,7 @@ const struct io_issue_def io_issue_defs[] = {
.ioprio = 1, .ioprio = 1,
.iopoll = 1, .iopoll = 1,
.iopoll_queue = 1, .iopoll_queue = 1,
.prep = io_prep_rw, .prep = io_prep_write,
.issue = io_write, .issue = io_write,
}, },
[IORING_OP_FADVISE] = { [IORING_OP_FADVISE] = {
...@@ -490,14 +490,12 @@ const struct io_cold_def io_cold_defs[] = { ...@@ -490,14 +490,12 @@ const struct io_cold_def io_cold_defs[] = {
[IORING_OP_READV] = { [IORING_OP_READV] = {
.async_size = sizeof(struct io_async_rw), .async_size = sizeof(struct io_async_rw),
.name = "READV", .name = "READV",
.prep_async = io_readv_prep_async,
.cleanup = io_readv_writev_cleanup, .cleanup = io_readv_writev_cleanup,
.fail = io_rw_fail, .fail = io_rw_fail,
}, },
[IORING_OP_WRITEV] = { [IORING_OP_WRITEV] = {
.async_size = sizeof(struct io_async_rw), .async_size = sizeof(struct io_async_rw),
.name = "WRITEV", .name = "WRITEV",
.prep_async = io_writev_prep_async,
.cleanup = io_readv_writev_cleanup, .cleanup = io_readv_writev_cleanup,
.fail = io_rw_fail, .fail = io_rw_fail,
}, },
...@@ -699,6 +697,7 @@ const struct io_cold_def io_cold_defs[] = { ...@@ -699,6 +697,7 @@ const struct io_cold_def io_cold_defs[] = {
#endif #endif
}, },
[IORING_OP_READ_MULTISHOT] = { [IORING_OP_READ_MULTISHOT] = {
.async_size = sizeof(struct io_async_rw),
.name = "READ_MULTISHOT", .name = "READ_MULTISHOT",
}, },
[IORING_OP_WAITID] = { [IORING_OP_WAITID] = {
......
This diff is collapsed.
...@@ -9,21 +9,26 @@ struct io_rw_state { ...@@ -9,21 +9,26 @@ struct io_rw_state {
}; };
struct io_async_rw { struct io_async_rw {
union {
size_t bytes_done;
struct io_cache_entry cache;
};
struct io_rw_state s; struct io_rw_state s;
const struct iovec *free_iovec; struct iovec *free_iovec;
size_t bytes_done;
struct wait_page_queue wpq; struct wait_page_queue wpq;
}; };
int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_prep_readv(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_writev(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_write(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_read(struct io_kiocb *req, unsigned int issue_flags); int io_read(struct io_kiocb *req, unsigned int issue_flags);
int io_readv_prep_async(struct io_kiocb *req);
int io_write(struct io_kiocb *req, unsigned int issue_flags); int io_write(struct io_kiocb *req, unsigned int issue_flags);
int io_writev_prep_async(struct io_kiocb *req);
void io_readv_writev_cleanup(struct io_kiocb *req); void io_readv_writev_cleanup(struct io_kiocb *req);
void io_rw_fail(struct io_kiocb *req); void io_rw_fail(struct io_kiocb *req);
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts); void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts);
int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags); int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags);
void io_rw_cache_free(struct io_cache_entry *entry);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment