Commit 8dd03afe authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe

io_uring: refactor rsrc refnode allocation

There are two problems:
1) we always allocate refnodes in advance and free them if those
haven't been used. It's expensive, takes two allocations, where one of
them is percpu. And it may be pretty common not actually using them.

2) Current API with allocating a refnode and setting some of the fields
is error prone, we don't ever want to have a file node runninng fixed
buffer callback...

Solve both with pre-init/get API. Pre-init just leaves the node for
later if not used, and for get (i.e. io_rsrc_refnode_get()), you need to
explicitly pass all arguments setting callbacks/etc., so it's more
resilient.
Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent dd78f492
......@@ -442,6 +442,7 @@ struct io_ring_ctx {
struct llist_head rsrc_put_llist;
struct list_head rsrc_ref_list;
spinlock_t rsrc_ref_lock;
struct fixed_rsrc_ref_node *rsrc_backup_node;
struct io_restriction restrictions;
......@@ -7041,12 +7042,36 @@ static void io_sqe_rsrc_kill_node(struct io_ring_ctx *ctx, struct fixed_rsrc_dat
percpu_ref_kill(&ref_node->refs);
}
static int io_rsrc_refnode_prealloc(struct io_ring_ctx *ctx)
{
if (ctx->rsrc_backup_node)
return 0;
ctx->rsrc_backup_node = alloc_fixed_rsrc_ref_node(ctx);
return ctx->rsrc_backup_node ? 0 : -ENOMEM;
}
static struct fixed_rsrc_ref_node *
io_rsrc_refnode_get(struct io_ring_ctx *ctx,
struct fixed_rsrc_data *rsrc_data,
void (*rsrc_put)(struct io_ring_ctx *ctx,
struct io_rsrc_put *prsrc))
{
struct fixed_rsrc_ref_node *node = ctx->rsrc_backup_node;
WARN_ON_ONCE(!node);
ctx->rsrc_backup_node = NULL;
node->rsrc_data = rsrc_data;
node->rsrc_put = rsrc_put;
return node;
}
static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
struct io_ring_ctx *ctx,
void (*rsrc_put)(struct io_ring_ctx *ctx,
struct io_rsrc_put *prsrc))
{
struct fixed_rsrc_ref_node *backup_node;
struct fixed_rsrc_ref_node *node;
int ret;
if (data->quiesce)
......@@ -7054,13 +7079,9 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
data->quiesce = true;
do {
ret = -ENOMEM;
backup_node = alloc_fixed_rsrc_ref_node(ctx);
if (!backup_node)
ret = io_rsrc_refnode_prealloc(ctx);
if (ret)
break;
backup_node->rsrc_data = data;
backup_node->rsrc_put = rsrc_put;
io_sqe_rsrc_kill_node(ctx, data);
percpu_ref_kill(&data->refs);
flush_delayed_work(&ctx->rsrc_put_work);
......@@ -7070,17 +7091,16 @@ static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
break;
percpu_ref_resurrect(&data->refs);
io_sqe_rsrc_set_node(ctx, data, backup_node);
backup_node = NULL;
node = io_rsrc_refnode_get(ctx, data, rsrc_put);
io_sqe_rsrc_set_node(ctx, data, node);
reinit_completion(&data->done);
mutex_unlock(&ctx->uring_lock);
ret = io_run_task_work_sig();
mutex_lock(&ctx->uring_lock);
} while (ret >= 0);
data->quiesce = false;
if (backup_node)
destroy_fixed_rsrc_ref_node(backup_node);
return ret;
}
......@@ -7731,11 +7751,9 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
return -EOVERFLOW;
if (done > ctx->nr_user_files)
return -EINVAL;
ref_node = alloc_fixed_rsrc_ref_node(ctx);
if (!ref_node)
return -ENOMEM;
init_fixed_file_ref_node(ctx, ref_node);
err = io_rsrc_refnode_prealloc(ctx);
if (err)
return err;
fds = u64_to_user_ptr(up->data);
for (done = 0; done < nr_args; done++) {
......@@ -7789,10 +7807,9 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (needs_switch) {
percpu_ref_kill(&data->node->refs);
ref_node = io_rsrc_refnode_get(ctx, data, io_ring_file_put);
io_sqe_rsrc_set_node(ctx, data, ref_node);
} else
destroy_fixed_rsrc_ref_node(ref_node);
}
return done ? done : err;
}
......@@ -8468,6 +8485,9 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
io_eventfd_unregister(ctx);
io_destroy_buffers(ctx);
if (ctx->rsrc_backup_node)
destroy_fixed_rsrc_ref_node(ctx->rsrc_backup_node);
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
ctx->ring_sock->file = NULL; /* so that iput() is called */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment