Commit 5b9a7bb7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.4/io_uring-2023-04-21' of git://git.kernel.dk/linux

Pull io_uring updates from Jens Axboe:

 - Cleanup of the io-wq per-node mapping, notably getting rid of it so
   we just have a single io_wq entry per ring (Breno)

 - Followup to the above, move accounting to io_wq as well and
   completely drop struct io_wqe (Gabriel)

 - Enable KASAN for the internal io_uring caches (Breno)

 - Add support for multishot timeouts. Some applications use timeouts to
   wake someone waiting on completion entries, and this makes it a bit
   easier to just have a recurring timer rather than needing to rearm it
   every time (David)

 - Support archs that have shared cache coloring between userspace and
   the kernel, and hence have strict address requirements for mmap'ing
   the ring into userspace. This should only be parisc/hppa. (Helge, me)

 - XFS has supported O_DIRECT writes without needing to lock the inode
   exclusively for a long time, and ext4 now supports it as well. This
   is true for the common cases of not extending the file size. Flag the
   fs as having that feature, and utilize that to avoid serializing
   those writes in io_uring (me)

 - Enable completion batching for uring commands (me)

 - Revert patch adding io_uring restriction to what can be GUP mapped or
   not. This does not belong in io_uring, as io_uring isn't really
   special in this regard. Since this is also getting in the way of
   cleanups and improvements to the GUP code, get rid of if (me)

 - A few series greatly reducing the complexity of registered resources,
   like buffers or files. Not only does this clean up the code a lot,
   the simplified code is also a LOT more efficient (Pavel)

 - Series optimizing how we wait for events and run task_work related to
   it (Pavel)

 - Fixes for file/buffer unregistration with DEFER_TASKRUN (Pavel)

 - Misc cleanups and improvements (Pavel, me)

* tag 'for-6.4/io_uring-2023-04-21' of git://git.kernel.dk/linux: (71 commits)
  Revert "io_uring/rsrc: disallow multi-source reg buffers"
  io_uring: add support for multishot timeouts
  io_uring/rsrc: disassociate nodes and rsrc_data
  io_uring/rsrc: devirtualise rsrc put callbacks
  io_uring/rsrc: pass node to io_rsrc_put_work()
  io_uring/rsrc: inline io_rsrc_put_work()
  io_uring/rsrc: add empty flag in rsrc_node
  io_uring/rsrc: merge nodes and io_rsrc_put
  io_uring/rsrc: infer node from ctx on io_queue_rsrc_removal
  io_uring/rsrc: remove unused io_rsrc_node::llist
  io_uring/rsrc: refactor io_queue_rsrc_removal
  io_uring/rsrc: simplify single file node switching
  io_uring/rsrc: clean up __io_sqe_buffers_update()
  io_uring/rsrc: inline switch_start fast path
  io_uring/rsrc: remove rsrc_data refs
  io_uring/rsrc: fix DEFER_TASKRUN rsrc quiesce
  io_uring/rsrc: use wq for quiescing
  io_uring/rsrc: refactor io_rsrc_ref_quiesce
  io_uring/rsrc: remove io_rsrc_node::done
  io_uring/rsrc: use nospec'ed indexes
  ...
parents 5c7ecada 3c85cc43
...@@ -899,7 +899,8 @@ static int ext4_file_open(struct inode *inode, struct file *filp) ...@@ -899,7 +899,8 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret; return ret;
} }
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC |
FMODE_DIO_PARALLEL_WRITE;
return dquot_file_open(inode, filp); return dquot_file_open(inode, filp);
} }
......
...@@ -1171,7 +1171,8 @@ xfs_file_open( ...@@ -1171,7 +1171,8 @@ xfs_file_open(
{ {
if (xfs_is_shutdown(XFS_M(inode->i_sb))) if (xfs_is_shutdown(XFS_M(inode->i_sb)))
return -EIO; return -EIO;
file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC; file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
FMODE_DIO_PARALLEL_WRITE;
return generic_file_open(inode, file); return generic_file_open(inode, file);
} }
......
...@@ -168,6 +168,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, ...@@ -168,6 +168,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define FMODE_NOREUSE ((__force fmode_t)0x800000) #define FMODE_NOREUSE ((__force fmode_t)0x800000)
/* File supports non-exclusive O_DIRECT writes from multiple threads */
#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000)
/* File was opened by fanotify and shouldn't generate fanotify events */ /* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x4000000) #define FMODE_NONOTIFY ((__force fmode_t)0x4000000)
......
...@@ -188,8 +188,10 @@ struct io_ev_fd { ...@@ -188,8 +188,10 @@ struct io_ev_fd {
}; };
struct io_alloc_cache { struct io_alloc_cache {
struct hlist_head list; struct io_wq_work_node list;
unsigned int nr_cached; unsigned int nr_cached;
unsigned int max_cached;
size_t elem_size;
}; };
struct io_ring_ctx { struct io_ring_ctx {
...@@ -239,7 +241,6 @@ struct io_ring_ctx { ...@@ -239,7 +241,6 @@ struct io_ring_ctx {
* uring_lock, and updated through io_uring_register(2) * uring_lock, and updated through io_uring_register(2)
*/ */
struct io_rsrc_node *rsrc_node; struct io_rsrc_node *rsrc_node;
int rsrc_cached_refs;
atomic_t cancel_seq; atomic_t cancel_seq;
struct io_file_table file_table; struct io_file_table file_table;
unsigned nr_user_files; unsigned nr_user_files;
...@@ -295,7 +296,7 @@ struct io_ring_ctx { ...@@ -295,7 +296,7 @@ struct io_ring_ctx {
spinlock_t completion_lock; spinlock_t completion_lock;
bool poll_multi_queue; bool poll_multi_queue;
bool cq_waiting; atomic_t cq_wait_nr;
/* /*
* ->iopoll_list is protected by the ctx->uring_lock for * ->iopoll_list is protected by the ctx->uring_lock for
...@@ -325,16 +326,15 @@ struct io_ring_ctx { ...@@ -325,16 +326,15 @@ struct io_ring_ctx {
struct io_restriction restrictions; struct io_restriction restrictions;
/* slow path rsrc auxilary data, used by update/register */ /* slow path rsrc auxilary data, used by update/register */
struct io_rsrc_node *rsrc_backup_node;
struct io_mapped_ubuf *dummy_ubuf; struct io_mapped_ubuf *dummy_ubuf;
struct io_rsrc_data *file_data; struct io_rsrc_data *file_data;
struct io_rsrc_data *buf_data; struct io_rsrc_data *buf_data;
struct delayed_work rsrc_put_work; /* protected by ->uring_lock */
struct callback_head rsrc_put_tw;
struct llist_head rsrc_put_llist;
struct list_head rsrc_ref_list; struct list_head rsrc_ref_list;
spinlock_t rsrc_ref_lock; struct io_alloc_cache rsrc_node_cache;
struct wait_queue_head rsrc_quiesce_wq;
unsigned rsrc_quiesce;
struct list_head io_buffers_pages; struct list_head io_buffers_pages;
...@@ -366,6 +366,11 @@ struct io_ring_ctx { ...@@ -366,6 +366,11 @@ struct io_ring_ctx {
unsigned evfd_last_cq_tail; unsigned evfd_last_cq_tail;
}; };
struct io_tw_state {
/* ->uring_lock is taken, callbacks can use io_tw_lock to lock it */
bool locked;
};
enum { enum {
REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
...@@ -472,7 +477,7 @@ enum { ...@@ -472,7 +477,7 @@ enum {
REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT), REQ_F_HASH_LOCKED = BIT(REQ_F_HASH_LOCKED_BIT),
}; };
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
struct io_task_work { struct io_task_work {
struct llist_node node; struct llist_node node;
...@@ -562,6 +567,7 @@ struct io_kiocb { ...@@ -562,6 +567,7 @@ struct io_kiocb {
atomic_t refs; atomic_t refs;
atomic_t poll_refs; atomic_t poll_refs;
struct io_task_work io_task_work; struct io_task_work io_task_work;
unsigned nr_tw;
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
union { union {
struct hlist_node hash_node; struct hlist_node hash_node;
......
...@@ -360,19 +360,18 @@ TRACE_EVENT(io_uring_complete, ...@@ -360,19 +360,18 @@ TRACE_EVENT(io_uring_complete,
); );
/** /**
* io_uring_submit_sqe - called before submitting one SQE * io_uring_submit_req - called before submitting a request
* *
* @req: pointer to a submitted request * @req: pointer to a submitted request
* @force_nonblock: whether a context blocking or not
* *
* Allows to track SQE submitting, to understand what was the source of it, SQ * Allows to track SQE submitting, to understand what was the source of it, SQ
* thread or io_uring_enter call. * thread or io_uring_enter call.
*/ */
TRACE_EVENT(io_uring_submit_sqe, TRACE_EVENT(io_uring_submit_req,
TP_PROTO(struct io_kiocb *req, bool force_nonblock), TP_PROTO(struct io_kiocb *req),
TP_ARGS(req, force_nonblock), TP_ARGS(req),
TP_STRUCT__entry ( TP_STRUCT__entry (
__field( void *, ctx ) __field( void *, ctx )
...@@ -380,7 +379,6 @@ TRACE_EVENT(io_uring_submit_sqe, ...@@ -380,7 +379,6 @@ TRACE_EVENT(io_uring_submit_sqe,
__field( unsigned long long, user_data ) __field( unsigned long long, user_data )
__field( u8, opcode ) __field( u8, opcode )
__field( u32, flags ) __field( u32, flags )
__field( bool, force_nonblock )
__field( bool, sq_thread ) __field( bool, sq_thread )
__string( op_str, io_uring_get_opcode(req->opcode) ) __string( op_str, io_uring_get_opcode(req->opcode) )
...@@ -392,16 +390,15 @@ TRACE_EVENT(io_uring_submit_sqe, ...@@ -392,16 +390,15 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->user_data = req->cqe.user_data; __entry->user_data = req->cqe.user_data;
__entry->opcode = req->opcode; __entry->opcode = req->opcode;
__entry->flags = req->flags; __entry->flags = req->flags;
__entry->force_nonblock = force_nonblock;
__entry->sq_thread = req->ctx->flags & IORING_SETUP_SQPOLL; __entry->sq_thread = req->ctx->flags & IORING_SETUP_SQPOLL;
__assign_str(op_str, io_uring_get_opcode(req->opcode)); __assign_str(op_str, io_uring_get_opcode(req->opcode));
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, " TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req, "sq_thread %d", __entry->ctx, __entry->req,
__entry->user_data, __get_str(op_str), __entry->user_data, __get_str(op_str),
__entry->flags, __entry->force_nonblock, __entry->sq_thread) __entry->flags, __entry->sq_thread)
); );
/* /*
......
...@@ -250,6 +250,7 @@ enum io_uring_op { ...@@ -250,6 +250,7 @@ enum io_uring_op {
#define IORING_TIMEOUT_REALTIME (1U << 3) #define IORING_TIMEOUT_REALTIME (1U << 3)
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4) #define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5) #define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
#define IORING_TIMEOUT_MULTISHOT (1U << 6)
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME) #define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE) #define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
/* /*
...@@ -389,6 +390,9 @@ enum { ...@@ -389,6 +390,9 @@ enum {
#define IORING_OFF_SQ_RING 0ULL #define IORING_OFF_SQ_RING 0ULL
#define IORING_OFF_CQ_RING 0x8000000ULL #define IORING_OFF_CQ_RING 0x8000000ULL
#define IORING_OFF_SQES 0x10000000ULL #define IORING_OFF_SQES 0x10000000ULL
#define IORING_OFF_PBUF_RING 0x80000000ULL
#define IORING_OFF_PBUF_SHIFT 16
#define IORING_OFF_MMAP_MASK 0xf8000000ULL
/* /*
* Filled with the offset for mmap(2) * Filled with the offset for mmap(2)
...@@ -568,19 +572,6 @@ struct io_uring_rsrc_update2 { ...@@ -568,19 +572,6 @@ struct io_uring_rsrc_update2 {
__u32 resv2; __u32 resv2;
}; };
struct io_uring_notification_slot {
__u64 tag;
__u64 resv[3];
};
struct io_uring_notification_register {
__u32 nr_slots;
__u32 resv;
__u64 resv2;
__u64 data;
__u64 resv3;
};
/* Skip updating fd indexes set to this value in the fd table */ /* Skip updating fd indexes set to this value in the fd table */
#define IORING_REGISTER_FILES_SKIP (-2) #define IORING_REGISTER_FILES_SKIP (-2)
...@@ -635,12 +626,26 @@ struct io_uring_buf_ring { ...@@ -635,12 +626,26 @@ struct io_uring_buf_ring {
}; };
}; };
/*
* Flags for IORING_REGISTER_PBUF_RING.
*
* IOU_PBUF_RING_MMAP: If set, kernel will allocate the memory for the ring.
* The application must not set a ring_addr in struct
* io_uring_buf_reg, instead it must subsequently call
* mmap(2) with the offset set as:
* IORING_OFF_PBUF_RING | (bgid << IORING_OFF_PBUF_SHIFT)
* to get a virtual mapping for the ring.
*/
enum {
IOU_PBUF_RING_MMAP = 1,
};
/* argument for IORING_(UN)REGISTER_PBUF_RING */ /* argument for IORING_(UN)REGISTER_PBUF_RING */
struct io_uring_buf_reg { struct io_uring_buf_reg {
__u64 ring_addr; __u64 ring_addr;
__u32 ring_entries; __u32 ring_entries;
__u16 bgid; __u16 bgid;
__u16 pad; __u16 flags;
__u64 resv[3]; __u64 resv[3];
}; };
......
...@@ -7,47 +7,60 @@ ...@@ -7,47 +7,60 @@
#define IO_ALLOC_CACHE_MAX 512 #define IO_ALLOC_CACHE_MAX 512
struct io_cache_entry { struct io_cache_entry {
struct hlist_node node; struct io_wq_work_node node;
}; };
static inline bool io_alloc_cache_put(struct io_alloc_cache *cache, static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
struct io_cache_entry *entry) struct io_cache_entry *entry)
{ {
if (cache->nr_cached < IO_ALLOC_CACHE_MAX) { if (cache->nr_cached < cache->max_cached) {
cache->nr_cached++; cache->nr_cached++;
hlist_add_head(&entry->node, &cache->list); wq_stack_add_head(&entry->node, &cache->list);
/* KASAN poisons object */
kasan_slab_free_mempool(entry);
return true; return true;
} }
return false; return false;
} }
static inline bool io_alloc_cache_empty(struct io_alloc_cache *cache)
{
return !cache->list.next;
}
static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache) static inline struct io_cache_entry *io_alloc_cache_get(struct io_alloc_cache *cache)
{ {
if (!hlist_empty(&cache->list)) { if (cache->list.next) {
struct hlist_node *node = cache->list.first; struct io_cache_entry *entry;
hlist_del(node); entry = container_of(cache->list.next, struct io_cache_entry, node);
kasan_unpoison_range(entry, cache->elem_size);
cache->list.next = cache->list.next->next;
cache->nr_cached--; cache->nr_cached--;
return container_of(node, struct io_cache_entry, node); return entry;
} }
return NULL; return NULL;
} }
static inline void io_alloc_cache_init(struct io_alloc_cache *cache) static inline void io_alloc_cache_init(struct io_alloc_cache *cache,
unsigned max_nr, size_t size)
{ {
INIT_HLIST_HEAD(&cache->list); cache->list.next = NULL;
cache->nr_cached = 0; cache->nr_cached = 0;
cache->max_cached = max_nr;
cache->elem_size = size;
} }
static inline void io_alloc_cache_free(struct io_alloc_cache *cache, static inline void io_alloc_cache_free(struct io_alloc_cache *cache,
void (*free)(struct io_cache_entry *)) void (*free)(struct io_cache_entry *))
{ {
while (!hlist_empty(&cache->list)) { while (1) {
struct hlist_node *node = cache->list.first; struct io_cache_entry *entry = io_alloc_cache_get(cache);
hlist_del(node); if (!entry)
free(container_of(node, struct io_cache_entry, node)); break;
free(entry);
} }
cache->nr_cached = 0; cache->nr_cached = 0;
} }
......
...@@ -64,7 +64,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, ...@@ -64,7 +64,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
u32 slot_index) u32 slot_index)
__must_hold(&req->ctx->uring_lock) __must_hold(&req->ctx->uring_lock)
{ {
bool needs_switch = false;
struct io_fixed_file *file_slot; struct io_fixed_file *file_slot;
int ret; int ret;
...@@ -81,18 +80,13 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, ...@@ -81,18 +80,13 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
if (file_slot->file_ptr) { if (file_slot->file_ptr) {
struct file *old_file; struct file *old_file;
ret = io_rsrc_node_switch_start(ctx);
if (ret)
goto err;
old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); old_file = (struct file *)(file_slot->file_ptr & FFS_MASK);
ret = io_queue_rsrc_removal(ctx->file_data, slot_index, ret = io_queue_rsrc_removal(ctx->file_data, slot_index, old_file);
ctx->rsrc_node, old_file);
if (ret) if (ret)
goto err; return ret;
file_slot->file_ptr = 0; file_slot->file_ptr = 0;
io_file_bitmap_clear(&ctx->file_table, slot_index); io_file_bitmap_clear(&ctx->file_table, slot_index);
needs_switch = true;
} }
ret = io_scm_file_account(ctx, file); ret = io_scm_file_account(ctx, file);
...@@ -101,9 +95,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, ...@@ -101,9 +95,6 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
io_fixed_file_set(file_slot, file); io_fixed_file_set(file_slot, file);
io_file_bitmap_set(&ctx->file_table, slot_index); io_file_bitmap_set(&ctx->file_table, slot_index);
} }
err:
if (needs_switch)
io_rsrc_node_switch(ctx, ctx->file_data);
return ret; return ret;
} }
...@@ -156,9 +147,6 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) ...@@ -156,9 +147,6 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
return -ENXIO; return -ENXIO;
if (offset >= ctx->nr_user_files) if (offset >= ctx->nr_user_files)
return -EINVAL; return -EINVAL;
ret = io_rsrc_node_switch_start(ctx);
if (ret)
return ret;
offset = array_index_nospec(offset, ctx->nr_user_files); offset = array_index_nospec(offset, ctx->nr_user_files);
file_slot = io_fixed_file_slot(&ctx->file_table, offset); file_slot = io_fixed_file_slot(&ctx->file_table, offset);
...@@ -166,13 +154,12 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset) ...@@ -166,13 +154,12 @@ int io_fixed_fd_remove(struct io_ring_ctx *ctx, unsigned int offset)
return -EBADF; return -EBADF;
file = (struct file *)(file_slot->file_ptr & FFS_MASK); file = (struct file *)(file_slot->file_ptr & FFS_MASK);
ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); ret = io_queue_rsrc_removal(ctx->file_data, offset, file);
if (ret) if (ret)
return ret; return ret;
file_slot->file_ptr = 0; file_slot->file_ptr = 0;
io_file_bitmap_clear(&ctx->file_table, offset); io_file_bitmap_clear(&ctx->file_table, offset);
io_rsrc_node_switch(ctx, ctx->file_data);
return 0; return 0;
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -15,6 +15,20 @@ ...@@ -15,6 +15,20 @@
#include <trace/events/io_uring.h> #include <trace/events/io_uring.h>
#endif #endif
enum {
/* don't use deferred task_work */
IOU_F_TWQ_FORCE_NORMAL = 1,
/*
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
*
* Must not be used wirh requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
IOU_F_TWQ_LAZY_WAKE = 2,
};
enum { enum {
IOU_OK = 0, IOU_OK = 0,
IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED, IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
...@@ -48,20 +62,20 @@ static inline bool io_req_ffs_set(struct io_kiocb *req) ...@@ -48,20 +62,20 @@ static inline bool io_req_ffs_set(struct io_kiocb *req)
return req->flags & REQ_F_FIXED_FILE; return req->flags & REQ_F_FIXED_FILE;
} }
void __io_req_task_work_add(struct io_kiocb *req, bool allow_local); void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
bool io_is_uring_fops(struct file *file); bool io_is_uring_fops(struct file *file);
bool io_alloc_async_data(struct io_kiocb *req); bool io_alloc_async_data(struct io_kiocb *req);
void io_req_task_queue(struct io_kiocb *req); void io_req_task_queue(struct io_kiocb *req);
void io_queue_iowq(struct io_kiocb *req, bool *dont_use); void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use);
void io_req_task_complete(struct io_kiocb *req, bool *locked); void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
void io_req_task_queue_fail(struct io_kiocb *req, int ret); void io_req_task_queue_fail(struct io_kiocb *req, int ret);
void io_req_task_submit(struct io_kiocb *req, bool *locked); void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts);
void tctx_task_work(struct callback_head *cb); void tctx_task_work(struct callback_head *cb);
__cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
int io_uring_alloc_task_context(struct task_struct *task, int io_uring_alloc_task_context(struct task_struct *task,
struct io_ring_ctx *ctx); struct io_ring_ctx *ctx);
int io_poll_issue(struct io_kiocb *req, bool *locked); int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr); int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin); int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node); void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node);
...@@ -80,6 +94,8 @@ bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, ...@@ -80,6 +94,8 @@ bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
#define io_lockdep_assert_cq_locked(ctx) \ #define io_lockdep_assert_cq_locked(ctx) \
do { \ do { \
lockdep_assert(in_task()); \
\
if (ctx->flags & IORING_SETUP_IOPOLL) { \ if (ctx->flags & IORING_SETUP_IOPOLL) { \
lockdep_assert_held(&ctx->uring_lock); \ lockdep_assert_held(&ctx->uring_lock); \
} else if (!ctx->task_complete) { \ } else if (!ctx->task_complete) { \
...@@ -93,7 +109,7 @@ bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, ...@@ -93,7 +109,7 @@ bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
static inline void io_req_task_work_add(struct io_kiocb *req) static inline void io_req_task_work_add(struct io_kiocb *req)
{ {
__io_req_task_work_add(req, true); __io_req_task_work_add(req, 0);
} }
#define io_for_each_link(pos, head) \ #define io_for_each_link(pos, head) \
...@@ -228,8 +244,7 @@ static inline void io_poll_wq_wake(struct io_ring_ctx *ctx) ...@@ -228,8 +244,7 @@ static inline void io_poll_wq_wake(struct io_ring_ctx *ctx)
poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
} }
/* requires smb_mb() prior, see wq_has_sleeper() */ static inline void io_cqring_wake(struct io_ring_ctx *ctx)
static inline void __io_cqring_wake(struct io_ring_ctx *ctx)
{ {
/* /*
* Trigger waitqueue handler on all waiters on our waitqueue. This * Trigger waitqueue handler on all waiters on our waitqueue. This
...@@ -241,17 +256,11 @@ static inline void __io_cqring_wake(struct io_ring_ctx *ctx) ...@@ -241,17 +256,11 @@ static inline void __io_cqring_wake(struct io_ring_ctx *ctx)
* waitqueue handlers, we know we have a dependency between eventfd or * waitqueue handlers, we know we have a dependency between eventfd or
* epoll and should terminate multishot poll at that point. * epoll and should terminate multishot poll at that point.
*/ */
if (waitqueue_active(&ctx->cq_wait)) if (wq_has_sleeper(&ctx->cq_wait))
__wake_up(&ctx->cq_wait, TASK_NORMAL, 0, __wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
} }
static inline void io_cqring_wake(struct io_ring_ctx *ctx)
{
smp_mb();
__io_cqring_wake(ctx);
}
static inline bool io_sqring_full(struct io_ring_ctx *ctx) static inline bool io_sqring_full(struct io_ring_ctx *ctx)
{ {
struct io_rings *r = ctx->rings; struct io_rings *r = ctx->rings;
...@@ -262,9 +271,11 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx) ...@@ -262,9 +271,11 @@ static inline bool io_sqring_full(struct io_ring_ctx *ctx)
static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
{ {
struct io_rings *rings = ctx->rings; struct io_rings *rings = ctx->rings;
unsigned int entries;
/* make sure SQ entry isn't read before tail */ /* make sure SQ entry isn't read before tail */
return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; entries = smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
return min(entries, ctx->sq_entries);
} }
static inline int io_run_task_work(void) static inline int io_run_task_work(void)
...@@ -299,11 +310,11 @@ static inline bool io_task_work_pending(struct io_ring_ctx *ctx) ...@@ -299,11 +310,11 @@ static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
return task_work_pending(current) || !wq_list_empty(&ctx->work_llist); return task_work_pending(current) || !wq_list_empty(&ctx->work_llist);
} }
static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) static inline void io_tw_lock(struct io_ring_ctx *ctx, struct io_tw_state *ts)
{ {
if (!*locked) { if (!ts->locked) {
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
*locked = true; ts->locked = true;
} }
} }
......
...@@ -137,7 +137,8 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, ...@@ -137,7 +137,8 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
return NULL; return NULL;
head &= bl->mask; head &= bl->mask;
if (head < IO_BUFFER_LIST_BUF_PER_PAGE) { /* mmaped buffers are always contig */
if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) {
buf = &br->bufs[head]; buf = &br->bufs[head];
} else { } else {
int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1); int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
...@@ -179,7 +180,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, ...@@ -179,7 +180,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
bl = io_buffer_get_list(ctx, req->buf_index); bl = io_buffer_get_list(ctx, req->buf_index);
if (likely(bl)) { if (likely(bl)) {
if (bl->buf_nr_pages) if (bl->is_mapped)
ret = io_ring_buffer_select(req, len, bl, issue_flags); ret = io_ring_buffer_select(req, len, bl, issue_flags);
else else
ret = io_provided_buffer_select(req, len, bl); ret = io_provided_buffer_select(req, len, bl);
...@@ -214,17 +215,28 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, ...@@ -214,17 +215,28 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
if (!nbufs) if (!nbufs)
return 0; return 0;
if (bl->buf_nr_pages) { if (bl->is_mapped) {
int j;
i = bl->buf_ring->tail - bl->head; i = bl->buf_ring->tail - bl->head;
for (j = 0; j < bl->buf_nr_pages; j++) if (bl->is_mmap) {
unpin_user_page(bl->buf_pages[j]); struct page *page;
kvfree(bl->buf_pages);
bl->buf_pages = NULL; page = virt_to_head_page(bl->buf_ring);
bl->buf_nr_pages = 0; if (put_page_testzero(page))
free_compound_page(page);
bl->buf_ring = NULL;
bl->is_mmap = 0;
} else if (bl->buf_nr_pages) {
int j;
for (j = 0; j < bl->buf_nr_pages; j++)
unpin_user_page(bl->buf_pages[j]);
kvfree(bl->buf_pages);
bl->buf_pages = NULL;
bl->buf_nr_pages = 0;
}
/* make sure it's seen as empty */ /* make sure it's seen as empty */
INIT_LIST_HEAD(&bl->buf_list); INIT_LIST_HEAD(&bl->buf_list);
bl->is_mapped = 0;
return i; return i;
} }
...@@ -304,7 +316,7 @@ int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) ...@@ -304,7 +316,7 @@ int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
if (bl) { if (bl) {
ret = -EINVAL; ret = -EINVAL;
/* can't use provide/remove buffers command on mapped buffers */ /* can't use provide/remove buffers command on mapped buffers */
if (!bl->buf_nr_pages) if (!bl->is_mapped)
ret = __io_remove_buffers(ctx, bl, p->nbufs); ret = __io_remove_buffers(ctx, bl, p->nbufs);
} }
io_ring_submit_unlock(ctx, issue_flags); io_ring_submit_unlock(ctx, issue_flags);
...@@ -449,7 +461,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) ...@@ -449,7 +461,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
} }
} }
/* can't add buffers via this command for a mapped buffer ring */ /* can't add buffers via this command for a mapped buffer ring */
if (bl->buf_nr_pages) { if (bl->is_mapped) {
ret = -EINVAL; ret = -EINVAL;
goto err; goto err;
} }
...@@ -464,23 +476,87 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) ...@@ -464,23 +476,87 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK; return IOU_OK;
} }
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
struct io_buffer_list *bl)
{ {
struct io_uring_buf_ring *br; struct io_uring_buf_ring *br;
struct io_uring_buf_reg reg;
struct io_buffer_list *bl, *free_bl = NULL;
struct page **pages; struct page **pages;
int nr_pages; int nr_pages;
pages = io_pin_pages(reg->ring_addr,
flex_array_size(br, bufs, reg->ring_entries),
&nr_pages);
if (IS_ERR(pages))
return PTR_ERR(pages);
br = page_address(pages[0]);
#ifdef SHM_COLOUR
/*
* On platforms that have specific aliasing requirements, SHM_COLOUR
* is set and we must guarantee that the kernel and user side align
* nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and
* the application mmap's the provided ring buffer. Fail the request
* if we, by chance, don't end up with aligned addresses. The app
* should use IOU_PBUF_RING_MMAP instead, and liburing will handle
* this transparently.
*/
if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
int i;
for (i = 0; i < nr_pages; i++)
unpin_user_page(pages[i]);
return -EINVAL;
}
#endif
bl->buf_pages = pages;
bl->buf_nr_pages = nr_pages;
bl->buf_ring = br;
bl->is_mapped = 1;
bl->is_mmap = 0;
return 0;
}
static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
struct io_buffer_list *bl)
{
gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
size_t ring_size;
void *ptr;
ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring);
ptr = (void *) __get_free_pages(gfp, get_order(ring_size));
if (!ptr)
return -ENOMEM;
bl->buf_ring = ptr;
bl->is_mapped = 1;
bl->is_mmap = 1;
return 0;
}
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_reg reg;
struct io_buffer_list *bl, *free_bl = NULL;
int ret;
if (copy_from_user(&reg, arg, sizeof(reg))) if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT; return -EFAULT;
if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) if (reg.resv[0] || reg.resv[1] || reg.resv[2])
return -EINVAL; return -EINVAL;
if (!reg.ring_addr) if (reg.flags & ~IOU_PBUF_RING_MMAP)
return -EFAULT;
if (reg.ring_addr & ~PAGE_MASK)
return -EINVAL; return -EINVAL;
if (!(reg.flags & IOU_PBUF_RING_MMAP)) {
if (!reg.ring_addr)
return -EFAULT;
if (reg.ring_addr & ~PAGE_MASK)
return -EINVAL;
} else {
if (reg.ring_addr)
return -EINVAL;
}
if (!is_power_of_2(reg.ring_entries)) if (!is_power_of_2(reg.ring_entries))
return -EINVAL; return -EINVAL;
...@@ -497,7 +573,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) ...@@ -497,7 +573,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
bl = io_buffer_get_list(ctx, reg.bgid); bl = io_buffer_get_list(ctx, reg.bgid);
if (bl) { if (bl) {
/* if mapped buffer ring OR classic exists, don't allow */ /* if mapped buffer ring OR classic exists, don't allow */
if (bl->buf_nr_pages || !list_empty(&bl->buf_list)) if (bl->is_mapped || !list_empty(&bl->buf_list))
return -EEXIST; return -EEXIST;
} else { } else {
free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
...@@ -505,22 +581,21 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) ...@@ -505,22 +581,21 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
return -ENOMEM; return -ENOMEM;
} }
pages = io_pin_pages(reg.ring_addr, if (!(reg.flags & IOU_PBUF_RING_MMAP))
flex_array_size(br, bufs, reg.ring_entries), ret = io_pin_pbuf_ring(&reg, bl);
&nr_pages); else
if (IS_ERR(pages)) { ret = io_alloc_pbuf_ring(&reg, bl);
kfree(free_bl);
return PTR_ERR(pages); if (!ret) {
bl->nr_entries = reg.ring_entries;
bl->mask = reg.ring_entries - 1;
io_buffer_add_list(ctx, bl, reg.bgid);
return 0;
} }
br = page_address(pages[0]); kfree(free_bl);
bl->buf_pages = pages; return ret;
bl->buf_nr_pages = nr_pages;
bl->nr_entries = reg.ring_entries;
bl->buf_ring = br;
bl->mask = reg.ring_entries - 1;
io_buffer_add_list(ctx, bl, reg.bgid);
return 0;
} }
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
...@@ -530,13 +605,15 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) ...@@ -530,13 +605,15 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
if (copy_from_user(&reg, arg, sizeof(reg))) if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT; return -EFAULT;
if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) if (reg.resv[0] || reg.resv[1] || reg.resv[2])
return -EINVAL;
if (reg.flags)
return -EINVAL; return -EINVAL;
bl = io_buffer_get_list(ctx, reg.bgid); bl = io_buffer_get_list(ctx, reg.bgid);
if (!bl) if (!bl)
return -ENOENT; return -ENOENT;
if (!bl->buf_nr_pages) if (!bl->is_mapped)
return -EINVAL; return -EINVAL;
__io_remove_buffers(ctx, bl, -1U); __io_remove_buffers(ctx, bl, -1U);
...@@ -546,3 +623,14 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) ...@@ -546,3 +623,14 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
} }
return 0; return 0;
} }
void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
{
struct io_buffer_list *bl;
bl = io_buffer_get_list(ctx, bgid);
if (!bl || !bl->is_mmap)
return NULL;
return bl->buf_ring;
}
...@@ -23,6 +23,11 @@ struct io_buffer_list { ...@@ -23,6 +23,11 @@ struct io_buffer_list {
__u16 nr_entries; __u16 nr_entries;
__u16 head; __u16 head;
__u16 mask; __u16 mask;
/* ring mapped provided buffers */
__u8 is_mapped;
/* ring mapped provided buffers, but mmap'ed by application */
__u8 is_mmap;
}; };
struct io_buffer { struct io_buffer {
...@@ -50,6 +55,8 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags); ...@@ -50,6 +55,8 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags); void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid);
static inline void io_kbuf_recycle_ring(struct io_kiocb *req) static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
{ {
/* /*
......
...@@ -5,8 +5,8 @@ ...@@ -5,8 +5,8 @@
#include "alloc_cache.h" #include "alloc_cache.h"
#if defined(CONFIG_NET)
struct io_async_msghdr { struct io_async_msghdr {
#if defined(CONFIG_NET)
union { union {
struct iovec fast_iov[UIO_FASTIOV]; struct iovec fast_iov[UIO_FASTIOV];
struct { struct {
...@@ -22,8 +22,11 @@ struct io_async_msghdr { ...@@ -22,8 +22,11 @@ struct io_async_msghdr {
struct sockaddr __user *uaddr; struct sockaddr __user *uaddr;
struct msghdr msg; struct msghdr msg;
struct sockaddr_storage addr; struct sockaddr_storage addr;
#endif
}; };
#if defined(CONFIG_NET)
struct io_async_connect { struct io_async_connect {
struct sockaddr_storage address; struct sockaddr_storage address;
}; };
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#include "notif.h" #include "notif.h"
#include "rsrc.h" #include "rsrc.h"
static void io_notif_complete_tw_ext(struct io_kiocb *notif, bool *locked) static void io_notif_complete_tw_ext(struct io_kiocb *notif, struct io_tw_state *ts)
{ {
struct io_notif_data *nd = io_notif_to_data(notif); struct io_notif_data *nd = io_notif_to_data(notif);
struct io_ring_ctx *ctx = notif->ctx; struct io_ring_ctx *ctx = notif->ctx;
...@@ -21,7 +21,7 @@ static void io_notif_complete_tw_ext(struct io_kiocb *notif, bool *locked) ...@@ -21,7 +21,7 @@ static void io_notif_complete_tw_ext(struct io_kiocb *notif, bool *locked)
__io_unaccount_mem(ctx->user, nd->account_pages); __io_unaccount_mem(ctx->user, nd->account_pages);
nd->account_pages = 0; nd->account_pages = 0;
} }
io_req_task_complete(notif, locked); io_req_task_complete(notif, ts);
} }
static void io_tx_ubuf_callback(struct sk_buff *skb, struct ubuf_info *uarg, static void io_tx_ubuf_callback(struct sk_buff *skb, struct ubuf_info *uarg,
...@@ -31,7 +31,7 @@ static void io_tx_ubuf_callback(struct sk_buff *skb, struct ubuf_info *uarg, ...@@ -31,7 +31,7 @@ static void io_tx_ubuf_callback(struct sk_buff *skb, struct ubuf_info *uarg,
struct io_kiocb *notif = cmd_to_io_kiocb(nd); struct io_kiocb *notif = cmd_to_io_kiocb(nd);
if (refcount_dec_and_test(&uarg->refcnt)) if (refcount_dec_and_test(&uarg->refcnt))
io_req_task_work_add(notif); __io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE);
} }
static void io_tx_ubuf_callback_ext(struct sk_buff *skb, struct ubuf_info *uarg, static void io_tx_ubuf_callback_ext(struct sk_buff *skb, struct ubuf_info *uarg,
...@@ -79,7 +79,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx) ...@@ -79,7 +79,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
notif->io_task_work.func = io_req_task_complete; notif->io_task_work.func = io_req_task_complete;
nd = io_notif_to_data(notif); nd = io_notif_to_data(notif);
nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; nd->uarg.flags = IO_NOTIF_UBUF_FLAGS;
nd->uarg.callback = io_tx_ubuf_callback; nd->uarg.callback = io_tx_ubuf_callback;
refcount_set(&nd->uarg.refcnt, 1); refcount_set(&nd->uarg.refcnt, 1);
return notif; return notif;
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "rsrc.h" #include "rsrc.h"
#define IO_NOTIF_UBUF_FLAGS (SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN)
#define IO_NOTIF_SPLICE_BATCH 32 #define IO_NOTIF_SPLICE_BATCH 32
struct io_notif_data { struct io_notif_data {
...@@ -33,7 +34,7 @@ static inline void io_notif_flush(struct io_kiocb *notif) ...@@ -33,7 +34,7 @@ static inline void io_notif_flush(struct io_kiocb *notif)
/* drop slot's master ref */ /* drop slot's master ref */
if (refcount_dec_and_test(&nd->uarg.refcnt)) if (refcount_dec_and_test(&nd->uarg.refcnt))
io_req_task_work_add(notif); __io_req_task_work_add(notif, IOU_F_TWQ_LAZY_WAKE);
} }
static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len) static inline int io_notif_account_mem(struct io_kiocb *notif, unsigned len)
......
...@@ -148,7 +148,7 @@ static void io_poll_req_insert_locked(struct io_kiocb *req) ...@@ -148,7 +148,7 @@ static void io_poll_req_insert_locked(struct io_kiocb *req)
hlist_add_head(&req->hash_node, &table->hbs[index].list); hlist_add_head(&req->hash_node, &table->hbs[index].list);
} }
static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked) static void io_poll_tw_hash_eject(struct io_kiocb *req, struct io_tw_state *ts)
{ {
struct io_ring_ctx *ctx = req->ctx; struct io_ring_ctx *ctx = req->ctx;
...@@ -159,7 +159,7 @@ static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked) ...@@ -159,7 +159,7 @@ static void io_poll_tw_hash_eject(struct io_kiocb *req, bool *locked)
* already grabbed the mutex for us, but there is a chance it * already grabbed the mutex for us, but there is a chance it
* failed. * failed.
*/ */
io_tw_lock(ctx, locked); io_tw_lock(ctx, ts);
hash_del(&req->hash_node); hash_del(&req->hash_node);
req->flags &= ~REQ_F_HASH_LOCKED; req->flags &= ~REQ_F_HASH_LOCKED;
} else { } else {
...@@ -238,7 +238,7 @@ enum { ...@@ -238,7 +238,7 @@ enum {
* req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot
* poll and that the result is stored in req->cqe. * poll and that the result is stored in req->cqe.
*/ */
static int io_poll_check_events(struct io_kiocb *req, bool *locked) static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
{ {
int v; int v;
...@@ -300,13 +300,13 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) ...@@ -300,13 +300,13 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
__poll_t mask = mangle_poll(req->cqe.res & __poll_t mask = mangle_poll(req->cqe.res &
req->apoll_events); req->apoll_events);
if (!io_aux_cqe(req->ctx, *locked, req->cqe.user_data, if (!io_aux_cqe(req->ctx, ts->locked, req->cqe.user_data,
mask, IORING_CQE_F_MORE, false)) { mask, IORING_CQE_F_MORE, false)) {
io_req_set_res(req, mask, 0); io_req_set_res(req, mask, 0);
return IOU_POLL_REMOVE_POLL_USE_RES; return IOU_POLL_REMOVE_POLL_USE_RES;
} }
} else { } else {
int ret = io_poll_issue(req, locked); int ret = io_poll_issue(req, ts);
if (ret == IOU_STOP_MULTISHOT) if (ret == IOU_STOP_MULTISHOT)
return IOU_POLL_REMOVE_POLL_USE_RES; return IOU_POLL_REMOVE_POLL_USE_RES;
if (ret < 0) if (ret < 0)
...@@ -326,15 +326,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) ...@@ -326,15 +326,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked)
return IOU_POLL_NO_ACTION; return IOU_POLL_NO_ACTION;
} }
static void io_poll_task_func(struct io_kiocb *req, bool *locked) static void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
{ {
int ret; int ret;
ret = io_poll_check_events(req, locked); ret = io_poll_check_events(req, ts);
if (ret == IOU_POLL_NO_ACTION) if (ret == IOU_POLL_NO_ACTION)
return; return;
io_poll_remove_entries(req); io_poll_remove_entries(req);
io_poll_tw_hash_eject(req, locked); io_poll_tw_hash_eject(req, ts);
if (req->opcode == IORING_OP_POLL_ADD) { if (req->opcode == IORING_OP_POLL_ADD) {
if (ret == IOU_POLL_DONE) { if (ret == IOU_POLL_DONE) {
...@@ -343,7 +343,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) ...@@ -343,7 +343,7 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
poll = io_kiocb_to_cmd(req, struct io_poll); poll = io_kiocb_to_cmd(req, struct io_poll);
req->cqe.res = mangle_poll(req->cqe.res & poll->events); req->cqe.res = mangle_poll(req->cqe.res & poll->events);
} else if (ret == IOU_POLL_REISSUE) { } else if (ret == IOU_POLL_REISSUE) {
io_req_task_submit(req, locked); io_req_task_submit(req, ts);
return; return;
} else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) {
req->cqe.res = ret; req->cqe.res = ret;
...@@ -351,14 +351,14 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) ...@@ -351,14 +351,14 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
} }
io_req_set_res(req, req->cqe.res, 0); io_req_set_res(req, req->cqe.res, 0);
io_req_task_complete(req, locked); io_req_task_complete(req, ts);
} else { } else {
io_tw_lock(req->ctx, locked); io_tw_lock(req->ctx, ts);
if (ret == IOU_POLL_REMOVE_POLL_USE_RES) if (ret == IOU_POLL_REMOVE_POLL_USE_RES)
io_req_task_complete(req, locked); io_req_task_complete(req, ts);
else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE) else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE)
io_req_task_submit(req, locked); io_req_task_submit(req, ts);
else else
io_req_defer_failed(req, ret); io_req_defer_failed(req, ret);
} }
...@@ -977,7 +977,7 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) ...@@ -977,7 +977,7 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
struct io_hash_bucket *bucket; struct io_hash_bucket *bucket;
struct io_kiocb *preq; struct io_kiocb *preq;
int ret2, ret = 0; int ret2, ret = 0;
bool locked; struct io_tw_state ts = {};
preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket);
ret2 = io_poll_disarm(preq); ret2 = io_poll_disarm(preq);
...@@ -1027,8 +1027,8 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) ...@@ -1027,8 +1027,8 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
req_set_fail(preq); req_set_fail(preq);
io_req_set_res(preq, -ECANCELED, 0); io_req_set_res(preq, -ECANCELED, 0);
locked = !(issue_flags & IO_URING_F_UNLOCKED); ts.locked = !(issue_flags & IO_URING_F_UNLOCKED);
io_req_task_complete(preq, &locked); io_req_task_complete(preq, &ts);
out: out:
if (ret < 0) { if (ret < 0) {
req_set_fail(req); req_set_fail(req);
......
This diff is collapsed.
...@@ -4,6 +4,10 @@ ...@@ -4,6 +4,10 @@
#include <net/af_unix.h> #include <net/af_unix.h>
#include "alloc_cache.h"
#define IO_NODE_ALLOC_CACHE_MAX 32
#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) #define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3)
#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) #define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT)
#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) #define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1)
...@@ -14,7 +18,6 @@ enum { ...@@ -14,7 +18,6 @@ enum {
}; };
struct io_rsrc_put { struct io_rsrc_put {
struct list_head list;
u64 tag; u64 tag;
union { union {
void *rsrc; void *rsrc;
...@@ -30,19 +33,20 @@ struct io_rsrc_data { ...@@ -30,19 +33,20 @@ struct io_rsrc_data {
u64 **tags; u64 **tags;
unsigned int nr; unsigned int nr;
rsrc_put_fn *do_put; u16 rsrc_type;
atomic_t refs;
struct completion done;
bool quiesce; bool quiesce;
}; };
struct io_rsrc_node { struct io_rsrc_node {
struct percpu_ref refs; union {
struct io_cache_entry cache;
struct io_ring_ctx *ctx;
};
int refs;
bool empty;
u16 type;
struct list_head node; struct list_head node;
struct list_head rsrc_list; struct io_rsrc_put item;
struct io_rsrc_data *rsrc_data;
struct llist_node llist;
bool done;
}; };
struct io_mapped_ubuf { struct io_mapped_ubuf {
...@@ -54,16 +58,10 @@ struct io_mapped_ubuf { ...@@ -54,16 +58,10 @@ struct io_mapped_ubuf {
}; };
void io_rsrc_put_tw(struct callback_head *cb); void io_rsrc_put_tw(struct callback_head *cb);
void io_rsrc_put_work(struct work_struct *work); void io_rsrc_node_ref_zero(struct io_rsrc_node *node);
void io_rsrc_refs_refill(struct io_ring_ctx *ctx); void io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *ref_node);
void io_wait_rsrc_data(struct io_rsrc_data *data); struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx);
void io_rsrc_node_destroy(struct io_rsrc_node *ref_node); int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void *rsrc);
void io_rsrc_refs_drop(struct io_ring_ctx *ctx);
int io_rsrc_node_switch_start(struct io_ring_ctx *ctx);
int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx,
struct io_rsrc_node *node, void *rsrc);
void io_rsrc_node_switch(struct io_ring_ctx *ctx,
struct io_rsrc_data *data_to_kill);
int io_import_fixed(int ddir, struct iov_iter *iter, int io_import_fixed(int ddir, struct iov_iter *iter,
struct io_mapped_ubuf *imu, struct io_mapped_ubuf *imu,
...@@ -107,36 +105,24 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, ...@@ -107,36 +105,24 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
unsigned int size, unsigned int type); unsigned int size, unsigned int type);
static inline void io_rsrc_put_node(struct io_rsrc_node *node, int nr) static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
{ {
percpu_ref_put_many(&node->refs, nr); lockdep_assert_held(&ctx->uring_lock);
}
static inline void io_req_put_rsrc(struct io_kiocb *req) if (node && !--node->refs)
{ io_rsrc_node_ref_zero(node);
if (req->rsrc_node)
io_rsrc_put_node(req->rsrc_node, 1);
} }
static inline void io_req_put_rsrc_locked(struct io_kiocb *req, static inline void io_req_put_rsrc_locked(struct io_kiocb *req,
struct io_ring_ctx *ctx) struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock)
{ {
struct io_rsrc_node *node = req->rsrc_node; io_put_rsrc_node(ctx, req->rsrc_node);
if (node) {
if (node == ctx->rsrc_node)
ctx->rsrc_cached_refs++;
else
io_rsrc_put_node(node, 1);
}
} }
static inline void io_charge_rsrc_node(struct io_ring_ctx *ctx) static inline void io_charge_rsrc_node(struct io_ring_ctx *ctx,
struct io_rsrc_node *node)
{ {
ctx->rsrc_cached_refs--; node->refs++;
if (unlikely(ctx->rsrc_cached_refs < 0))
io_rsrc_refs_refill(ctx);
} }
static inline void io_req_set_rsrc_node(struct io_kiocb *req, static inline void io_req_set_rsrc_node(struct io_kiocb *req,
...@@ -149,7 +135,7 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req, ...@@ -149,7 +135,7 @@ static inline void io_req_set_rsrc_node(struct io_kiocb *req,
lockdep_assert_held(&ctx->uring_lock); lockdep_assert_held(&ctx->uring_lock);
req->rsrc_node = ctx->rsrc_node; req->rsrc_node = ctx->rsrc_node;
io_charge_rsrc_node(ctx); io_charge_rsrc_node(ctx, ctx->rsrc_node);
io_ring_submit_unlock(ctx, issue_flags); io_ring_submit_unlock(ctx, issue_flags);
} }
} }
...@@ -162,6 +148,12 @@ static inline u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) ...@@ -162,6 +148,12 @@ static inline u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx)
return &data->tags[table_idx][off]; return &data->tags[table_idx][off];
} }
static inline int io_rsrc_init(struct io_ring_ctx *ctx)
{
ctx->rsrc_node = io_rsrc_node_alloc(ctx);
return ctx->rsrc_node ? 0 : -ENOMEM;
}
int io_files_update(struct io_kiocb *req, unsigned int issue_flags); int io_files_update(struct io_kiocb *req, unsigned int issue_flags);
int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
......
...@@ -283,16 +283,16 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res) ...@@ -283,16 +283,16 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
return res; return res;
} }
static void io_req_rw_complete(struct io_kiocb *req, bool *locked) static void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
{ {
io_req_io_end(req); io_req_io_end(req);
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) { if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
req->cqe.flags |= io_put_kbuf(req, issue_flags); req->cqe.flags |= io_put_kbuf(req, issue_flags);
} }
io_req_task_complete(req, locked); io_req_task_complete(req, ts);
} }
static void io_complete_rw(struct kiocb *kiocb, long res) static void io_complete_rw(struct kiocb *kiocb, long res)
...@@ -304,7 +304,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res) ...@@ -304,7 +304,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
return; return;
io_req_set_res(req, io_fixup_rw_res(req, res), 0); io_req_set_res(req, io_fixup_rw_res(req, res), 0);
req->io_task_work.func = io_req_rw_complete; req->io_task_work.func = io_req_rw_complete;
io_req_task_work_add(req); __io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
} }
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
......
...@@ -17,6 +17,7 @@ struct io_timeout { ...@@ -17,6 +17,7 @@ struct io_timeout {
struct file *file; struct file *file;
u32 off; u32 off;
u32 target_seq; u32 target_seq;
u32 repeats;
struct list_head list; struct list_head list;
/* head of the link, used by linked timeouts only */ /* head of the link, used by linked timeouts only */
struct io_kiocb *head; struct io_kiocb *head;
...@@ -37,8 +38,9 @@ struct io_timeout_rem { ...@@ -37,8 +38,9 @@ struct io_timeout_rem {
static inline bool io_is_timeout_noseq(struct io_kiocb *req) static inline bool io_is_timeout_noseq(struct io_kiocb *req)
{ {
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
struct io_timeout_data *data = req->async_data;
return !timeout->off; return !timeout->off || data->flags & IORING_TIMEOUT_MULTISHOT;
} }
static inline void io_put_req(struct io_kiocb *req) static inline void io_put_req(struct io_kiocb *req)
...@@ -49,6 +51,44 @@ static inline void io_put_req(struct io_kiocb *req) ...@@ -49,6 +51,44 @@ static inline void io_put_req(struct io_kiocb *req)
} }
} }
static inline bool io_timeout_finish(struct io_timeout *timeout,
struct io_timeout_data *data)
{
if (!(data->flags & IORING_TIMEOUT_MULTISHOT))
return true;
if (!timeout->off || (timeout->repeats && --timeout->repeats))
return false;
return true;
}
static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer);
static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
struct io_timeout_data *data = req->async_data;
struct io_ring_ctx *ctx = req->ctx;
if (!io_timeout_finish(timeout, data)) {
bool filled;
filled = io_aux_cqe(ctx, ts->locked, req->cqe.user_data, -ETIME,
IORING_CQE_F_MORE, false);
if (filled) {
/* re-arm timer */
spin_lock_irq(&ctx->timeout_lock);
list_add(&timeout->list, ctx->timeout_list.prev);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
spin_unlock_irq(&ctx->timeout_lock);
return;
}
}
io_req_task_complete(req, ts);
}
static bool io_kill_timeout(struct io_kiocb *req, int status) static bool io_kill_timeout(struct io_kiocb *req, int status)
__must_hold(&req->ctx->timeout_lock) __must_hold(&req->ctx->timeout_lock)
{ {
...@@ -101,9 +141,9 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx) ...@@ -101,9 +141,9 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
spin_unlock_irq(&ctx->timeout_lock); spin_unlock_irq(&ctx->timeout_lock);
} }
static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked) static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
{ {
io_tw_lock(link->ctx, locked); io_tw_lock(link->ctx, ts);
while (link) { while (link) {
struct io_kiocb *nxt = link->link; struct io_kiocb *nxt = link->link;
long res = -ECANCELED; long res = -ECANCELED;
...@@ -112,7 +152,7 @@ static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked) ...@@ -112,7 +152,7 @@ static void io_req_tw_fail_links(struct io_kiocb *link, bool *locked)
res = link->cqe.res; res = link->cqe.res;
link->link = NULL; link->link = NULL;
io_req_set_res(link, res, 0); io_req_set_res(link, res, 0);
io_req_task_complete(link, locked); io_req_task_complete(link, ts);
link = nxt; link = nxt;
} }
} }
...@@ -212,7 +252,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) ...@@ -212,7 +252,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
req_set_fail(req); req_set_fail(req);
io_req_set_res(req, -ETIME, 0); io_req_set_res(req, -ETIME, 0);
req->io_task_work.func = io_req_task_complete; req->io_task_work.func = io_timeout_complete;
io_req_task_work_add(req); io_req_task_work_add(req);
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
} }
...@@ -265,9 +305,9 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) ...@@ -265,9 +305,9 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
return 0; return 0;
} }
static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *ts)
{ {
unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout); struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
struct io_kiocb *prev = timeout->prev; struct io_kiocb *prev = timeout->prev;
int ret = -ENOENT; int ret = -ENOENT;
...@@ -282,11 +322,11 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) ...@@ -282,11 +322,11 @@ static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked)
ret = io_try_cancel(req->task->io_uring, &cd, issue_flags); ret = io_try_cancel(req->task->io_uring, &cd, issue_flags);
} }
io_req_set_res(req, ret ?: -ETIME, 0); io_req_set_res(req, ret ?: -ETIME, 0);
io_req_task_complete(req, locked); io_req_task_complete(req, ts);
io_put_req(prev); io_put_req(prev);
} else { } else {
io_req_set_res(req, -ETIME, 0); io_req_set_res(req, -ETIME, 0);
io_req_task_complete(req, locked); io_req_task_complete(req, ts);
} }
} }
...@@ -470,16 +510,27 @@ static int __io_timeout_prep(struct io_kiocb *req, ...@@ -470,16 +510,27 @@ static int __io_timeout_prep(struct io_kiocb *req,
return -EINVAL; return -EINVAL;
flags = READ_ONCE(sqe->timeout_flags); flags = READ_ONCE(sqe->timeout_flags);
if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK |
IORING_TIMEOUT_ETIME_SUCCESS)) IORING_TIMEOUT_ETIME_SUCCESS |
IORING_TIMEOUT_MULTISHOT))
return -EINVAL; return -EINVAL;
/* more than one clock specified is invalid, obviously */ /* more than one clock specified is invalid, obviously */
if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1)
return -EINVAL; return -EINVAL;
/* multishot requests only make sense with rel values */
if (!(~flags & (IORING_TIMEOUT_MULTISHOT | IORING_TIMEOUT_ABS)))
return -EINVAL;
INIT_LIST_HEAD(&timeout->list); INIT_LIST_HEAD(&timeout->list);
timeout->off = off; timeout->off = off;
if (unlikely(off && !req->ctx->off_timeout_used)) if (unlikely(off && !req->ctx->off_timeout_used))
req->ctx->off_timeout_used = true; req->ctx->off_timeout_used = true;
/*
* for multishot reqs w/ fixed nr of repeats, repeats tracks the
* remaining nr
*/
timeout->repeats = 0;
if ((flags & IORING_TIMEOUT_MULTISHOT) && off > 0)
timeout->repeats = off;
if (WARN_ON_ONCE(req_has_async_data(req))) if (WARN_ON_ONCE(req_has_async_data(req)))
return -EFAULT; return -EFAULT;
......
...@@ -12,10 +12,10 @@ ...@@ -12,10 +12,10 @@
#include "rsrc.h" #include "rsrc.h"
#include "uring_cmd.h" #include "uring_cmd.h"
static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
{ {
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; unsigned issue_flags = ts->locked ? 0 : IO_URING_F_UNLOCKED;
ioucmd->task_work_cb(ioucmd, issue_flags); ioucmd->task_work_cb(ioucmd, issue_flags);
} }
...@@ -54,11 +54,15 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2, ...@@ -54,11 +54,15 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2,
io_req_set_res(req, ret, 0); io_req_set_res(req, ret, 0);
if (req->ctx->flags & IORING_SETUP_CQE32) if (req->ctx->flags & IORING_SETUP_CQE32)
io_req_set_cqe32_extra(req, res2, 0); io_req_set_cqe32_extra(req, res2, 0);
if (req->ctx->flags & IORING_SETUP_IOPOLL) if (req->ctx->flags & IORING_SETUP_IOPOLL) {
/* order with io_iopoll_req_issued() checking ->iopoll_complete */ /* order with io_iopoll_req_issued() checking ->iopoll_complete */
smp_store_release(&req->iopoll_completed, 1); smp_store_release(&req->iopoll_completed, 1);
else } else {
io_req_complete_post(req, issue_flags); struct io_tw_state ts = {
.locked = !(issue_flags & IO_URING_F_UNLOCKED),
};
io_req_task_complete(req, &ts);
}
} }
EXPORT_SYMBOL_GPL(io_uring_cmd_done); EXPORT_SYMBOL_GPL(io_uring_cmd_done);
...@@ -73,6 +77,7 @@ int io_uring_cmd_prep_async(struct io_kiocb *req) ...@@ -73,6 +77,7 @@ int io_uring_cmd_prep_async(struct io_kiocb *req)
cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128); cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128);
memcpy(req->async_data, ioucmd->cmd, cmd_size); memcpy(req->async_data, ioucmd->cmd, cmd_size);
ioucmd->cmd = req->async_data;
return 0; return 0;
} }
...@@ -129,9 +134,6 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) ...@@ -129,9 +134,6 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
WRITE_ONCE(ioucmd->cookie, NULL); WRITE_ONCE(ioucmd->cookie, NULL);
} }
if (req_has_async_data(req))
ioucmd->cmd = req->async_data;
ret = file->f_op->uring_cmd(ioucmd, issue_flags); ret = file->f_op->uring_cmd(ioucmd, issue_flags);
if (ret == -EAGAIN) { if (ret == -EAGAIN) {
if (!req_has_async_data(req)) { if (!req_has_async_data(req)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment