Commit eb4a299b authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe

io_uring: cache struct io_notif

kmalloc'ing struct io_notif is too expensive when done frequently, cache
them as many other resources in io_uring. Keep two list, the first one
is from where we're getting notifiers, it's protected by ->uring_lock.
The second is protected by ->completion_lock, to which we queue released
notifiers. Then we splice one list into another when needed.
Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/9dec18f7fcbab9f4bd40b96e5ae158b119945230.1657643355.git.asml.silence@gmail.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent eb42cebb
...@@ -249,6 +249,9 @@ struct io_ring_ctx { ...@@ -249,6 +249,9 @@ struct io_ring_ctx {
struct xarray io_bl_xa; struct xarray io_bl_xa;
struct list_head io_buffers_cache; struct list_head io_buffers_cache;
/* struct io_notif cache, protected by uring_lock */
struct list_head notif_list;
struct io_hash_table cancel_table_locked; struct io_hash_table cancel_table_locked;
struct list_head cq_overflow_list; struct list_head cq_overflow_list;
struct io_alloc_cache apoll_cache; struct io_alloc_cache apoll_cache;
...@@ -259,6 +262,10 @@ struct io_ring_ctx { ...@@ -259,6 +262,10 @@ struct io_ring_ctx {
struct io_wq_work_list locked_free_list; struct io_wq_work_list locked_free_list;
unsigned int locked_free_nr; unsigned int locked_free_nr;
/* struct io_notif cache protected by completion_lock */
struct list_head notif_list_locked;
unsigned int notif_locked_nr;
const struct cred *sq_creds; /* cred used for __io_sq_thread() */ const struct cred *sq_creds; /* cred used for __io_sq_thread() */
struct io_sq_data *sq_data; /* if using sq thread polling */ struct io_sq_data *sq_data; /* if using sq thread polling */
......
...@@ -321,6 +321,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ...@@ -321,6 +321,8 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
INIT_WQ_LIST(&ctx->locked_free_list); INIT_WQ_LIST(&ctx->locked_free_list);
INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func);
INIT_WQ_LIST(&ctx->submit_state.compl_reqs); INIT_WQ_LIST(&ctx->submit_state.compl_reqs);
INIT_LIST_HEAD(&ctx->notif_list);
INIT_LIST_HEAD(&ctx->notif_list_locked);
return ctx; return ctx;
err: err:
kfree(ctx->dummy_ubuf); kfree(ctx->dummy_ubuf);
...@@ -2493,6 +2495,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) ...@@ -2493,6 +2495,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots); WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);
io_notif_cache_purge(ctx);
io_mem_free(ctx->rings); io_mem_free(ctx->rings);
io_mem_free(ctx->sq_sqes); io_mem_free(ctx->sq_sqes);
......
...@@ -15,10 +15,12 @@ static void __io_notif_complete_tw(struct callback_head *cb) ...@@ -15,10 +15,12 @@ static void __io_notif_complete_tw(struct callback_head *cb)
io_cq_lock(ctx); io_cq_lock(ctx);
io_fill_cqe_aux(ctx, notif->tag, 0, notif->seq, true); io_fill_cqe_aux(ctx, notif->tag, 0, notif->seq, true);
list_add(&notif->cache_node, &ctx->notif_list_locked);
ctx->notif_locked_nr++;
io_cq_unlock_post(ctx); io_cq_unlock_post(ctx);
percpu_ref_put(&ctx->refs); percpu_ref_put(&ctx->refs);
kfree(notif);
} }
static inline void io_notif_complete(struct io_notif *notif) static inline void io_notif_complete(struct io_notif *notif)
...@@ -45,21 +47,62 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb, ...@@ -45,21 +47,62 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
queue_work(system_unbound_wq, &notif->commit_work); queue_work(system_unbound_wq, &notif->commit_work);
} }
static void io_notif_splice_cached(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
spin_lock(&ctx->completion_lock);
list_splice_init(&ctx->notif_list_locked, &ctx->notif_list);
ctx->notif_locked_nr = 0;
spin_unlock(&ctx->completion_lock);
}
void io_notif_cache_purge(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
io_notif_splice_cached(ctx);
while (!list_empty(&ctx->notif_list)) {
struct io_notif *notif = list_first_entry(&ctx->notif_list,
struct io_notif, cache_node);
list_del(&notif->cache_node);
kfree(notif);
}
}
static inline bool io_notif_has_cached(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{
if (likely(!list_empty(&ctx->notif_list)))
return true;
if (data_race(READ_ONCE(ctx->notif_locked_nr) <= IO_NOTIF_SPLICE_BATCH))
return false;
io_notif_splice_cached(ctx);
return !list_empty(&ctx->notif_list);
}
struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx, struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
struct io_notif_slot *slot) struct io_notif_slot *slot)
__must_hold(&ctx->uring_lock) __must_hold(&ctx->uring_lock)
{ {
struct io_notif *notif; struct io_notif *notif;
if (likely(io_notif_has_cached(ctx))) {
notif = list_first_entry(&ctx->notif_list,
struct io_notif, cache_node);
list_del(&notif->cache_node);
} else {
notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT); notif = kzalloc(sizeof(*notif), GFP_ATOMIC | __GFP_ACCOUNT);
if (!notif) if (!notif)
return NULL; return NULL;
/* pre-initialise some fields */
notif->seq = slot->seq++;
notif->tag = slot->tag;
notif->ctx = ctx; notif->ctx = ctx;
notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; notif->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
notif->uarg.callback = io_uring_tx_zerocopy_callback; notif->uarg.callback = io_uring_tx_zerocopy_callback;
}
notif->seq = slot->seq++;
notif->tag = slot->tag;
/* master ref owned by io_notif_slot, will be dropped on flush */ /* master ref owned by io_notif_slot, will be dropped on flush */
refcount_set(&notif->uarg.refcnt, 1); refcount_set(&notif->uarg.refcnt, 1);
percpu_ref_get(&ctx->refs); percpu_ref_get(&ctx->refs);
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#include <net/sock.h> #include <net/sock.h>
#include <linux/nospec.h> #include <linux/nospec.h>
#define IO_NOTIF_SPLICE_BATCH 32
struct io_notif { struct io_notif {
struct ubuf_info uarg; struct ubuf_info uarg;
struct io_ring_ctx *ctx; struct io_ring_ctx *ctx;
...@@ -13,6 +15,8 @@ struct io_notif { ...@@ -13,6 +15,8 @@ struct io_notif {
u64 tag; u64 tag;
/* see struct io_notif_slot::seq */ /* see struct io_notif_slot::seq */
u32 seq; u32 seq;
/* hook into ctx->notif_list and ctx->notif_list_locked */
struct list_head cache_node;
union { union {
struct callback_head task_work; struct callback_head task_work;
...@@ -41,6 +45,7 @@ struct io_notif_slot { ...@@ -41,6 +45,7 @@ struct io_notif_slot {
}; };
int io_notif_unregister(struct io_ring_ctx *ctx); int io_notif_unregister(struct io_ring_ctx *ctx);
void io_notif_cache_purge(struct io_ring_ctx *ctx);
struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx, struct io_notif *io_alloc_notif(struct io_ring_ctx *ctx,
struct io_notif_slot *slot); struct io_notif_slot *slot);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment