Commit 625434da authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.13/io_uring-2021-04-27' of git://git.kernel.dk/linux-block

Pull io_uring updates from Jens Axboe:

 - Support for multi-shot mode for POLL requests

 - More efficient reference counting. This is shamelessly stolen from
   the mm side. Even though referencing is mostly single/dual user, the
   128 count was retained to keep the code the same. Maybe this
   should/could be made generic at some point.

 - Removal of the need to have a manager thread for each ring. The
   manager threads only job was checking and creating new io-threads as
   needed, instead we handle this from the queue path.

 - Allow SQPOLL without CAP_SYS_ADMIN or CAP_SYS_NICE. Since 5.12, this
   thread is "just" a regular application thread, so no need to restrict
   use of it anymore.

 - Cleanup of how internal async poll data lifetime is managed.

 - Fix for syzbot reported crash on SQPOLL cancelation.

 - Make buffer registration more like file registrations, which includes
   flexibility in avoiding full set unregistration and re-registration.

 - Fix for io-wq affinity setting.

 - Be a bit more defensive in task->pf_io_worker setup.

 - Various SQPOLL fixes.

 - Cleanup of SQPOLL creds handling.

 - Improvements to in-flight request tracking.

 - File registration cleanups.

 - Tons of cleanups and little fixes

* tag 'for-5.13/io_uring-2021-04-27' of git://git.kernel.dk/linux-block: (156 commits)
  io_uring: maintain drain logic for multishot poll requests
  io_uring: Check current->io_uring in io_uring_cancel_sqpoll
  io_uring: fix NULL reg-buffer
  io_uring: simplify SQPOLL cancellations
  io_uring: fix work_exit sqpoll cancellations
  io_uring: Fix uninitialized variable up.resv
  io_uring: fix invalid error check after malloc
  io_uring: io_sq_thread() no longer needs to reset current->pf_io_worker
  kernel: always initialize task->pf_io_worker to NULL
  io_uring: update sq_thread_idle after ctx deleted
  io_uring: add full-fledged dynamic buffers support
  io_uring: implement fixed buffers registration similar to fixed files
  io_uring: prepare fixed rw for dynanic buffers
  io_uring: keep table of pointers to ubufs
  io_uring: add generic rsrc update with tags
  io_uring: add IORING_REGISTER_RSRC
  io_uring: enumerate dynamic resources
  io_uring: add generic path for rsrc update
  io_uring: preparation for rsrc tagging
  io_uring: decouple CQE filling from requests
  ...
parents c05a182b 7b289c38
This diff is collapsed.
......@@ -116,6 +116,7 @@ static inline void io_wq_put_hash(struct io_wq_hash *hash)
struct io_wq_data {
struct io_wq_hash *hash;
struct task_struct *task;
io_wq_work_fn *do_work;
free_work_fn *free_work;
};
......
This diff is collapsed.
......@@ -7,19 +7,17 @@
#if defined(CONFIG_IO_URING)
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_task_cancel(void);
void __io_uring_files_cancel(struct files_struct *files);
void __io_uring_cancel(struct files_struct *files);
void __io_uring_free(struct task_struct *tsk);
static inline void io_uring_task_cancel(void)
static inline void io_uring_files_cancel(struct files_struct *files)
{
if (current->io_uring)
__io_uring_task_cancel();
__io_uring_cancel(files);
}
static inline void io_uring_files_cancel(struct files_struct *files)
static inline void io_uring_task_cancel(void)
{
if (current->io_uring)
__io_uring_files_cancel(files);
return io_uring_files_cancel(NULL);
}
static inline void io_uring_free(struct task_struct *tsk)
{
......
......@@ -22,6 +22,8 @@ enum task_work_notify_mode {
int task_work_add(struct task_struct *task, struct callback_head *twork,
enum task_work_notify_mode mode);
struct callback_head *task_work_cancel_match(struct task_struct *task,
bool (*match)(struct callback_head *, void *data), void *data);
struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t);
void task_work_run(void);
......
......@@ -290,29 +290,32 @@ TRACE_EVENT(io_uring_fail_link,
* @ctx: pointer to a ring context structure
* @user_data: user data associated with the request
* @res: result of the request
* @cflags: completion flags
*
*/
TRACE_EVENT(io_uring_complete,
TP_PROTO(void *ctx, u64 user_data, long res),
TP_PROTO(void *ctx, u64 user_data, long res, unsigned cflags),
TP_ARGS(ctx, user_data, res),
TP_ARGS(ctx, user_data, res, cflags),
TP_STRUCT__entry (
__field( void *, ctx )
__field( u64, user_data )
__field( long, res )
__field( unsigned, cflags )
),
TP_fast_assign(
__entry->ctx = ctx;
__entry->user_data = user_data;
__entry->res = res;
__entry->cflags = cflags;
),
TP_printk("ring %p, user_data 0x%llx, result %ld",
TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x",
__entry->ctx, (unsigned long long)__entry->user_data,
__entry->res)
__entry->res, __entry->cflags)
);
......
......@@ -159,6 +159,21 @@ enum {
*/
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
/*
* POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
* command flags for POLL_ADD are stored in sqe->len.
*
* IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
* the poll handler will continue to report
* CQEs on behalf of the same SQE.
*
* IORING_POLL_UPDATE Update existing poll request, matching
* sqe->addr as the old user_data field.
*/
#define IORING_POLL_ADD_MULTI (1U << 0)
#define IORING_POLL_UPDATE_EVENTS (1U << 1)
#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
/*
* IO completion data structure (Completion Queue Entry)
*/
......@@ -172,8 +187,10 @@ struct io_uring_cqe {
* cqe->flags
*
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
*/
#define IORING_CQE_F_BUFFER (1U << 0)
#define IORING_CQE_F_MORE (1U << 1)
enum {
IORING_CQE_BUFFER_SHIFT = 16,
......@@ -281,6 +298,8 @@ enum {
IORING_UNREGISTER_PERSONALITY = 10,
IORING_REGISTER_RESTRICTIONS = 11,
IORING_REGISTER_ENABLE_RINGS = 12,
IORING_REGISTER_RSRC = 13,
IORING_REGISTER_RSRC_UPDATE = 14,
/* this goes last */
IORING_REGISTER_LAST
......@@ -293,12 +312,33 @@ struct io_uring_files_update {
__aligned_u64 /* __s32 * */ fds;
};
enum {
IORING_RSRC_FILE = 0,
IORING_RSRC_BUFFER = 1,
};
struct io_uring_rsrc_register {
__u32 type;
__u32 nr;
__aligned_u64 data;
__aligned_u64 tags;
};
struct io_uring_rsrc_update {
__u32 offset;
__u32 resv;
__aligned_u64 data;
};
struct io_uring_rsrc_update2 {
__u32 offset;
__u32 resv;
__aligned_u64 data;
__aligned_u64 tags;
__u32 type;
__u32 nr;
};
/* Skip updating fd indexes set to this value in the fd table */
#define IORING_REGISTER_FILES_SKIP (-2)
......
......@@ -927,6 +927,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
tsk->splice_pipe = NULL;
tsk->task_frag.page = NULL;
tsk->wake_q.next = NULL;
tsk->pf_io_worker = NULL;
account_kernel_stack(tsk, 1);
......@@ -1941,7 +1942,7 @@ static __latent_entropy struct task_struct *copy_process(
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
retval = -ERESTARTNOINTR;
if (signal_pending(current))
if (task_sigpending(current))
goto fork_out;
retval = -ENOMEM;
......
......@@ -59,18 +59,17 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
}
/**
* task_work_cancel - cancel a pending work added by task_work_add()
* task_work_cancel_match - cancel a pending work added by task_work_add()
* @task: the task which should execute the work
* @func: identifies the work to remove
*
* Find the last queued pending work with ->func == @func and remove
* it from queue.
* @match: match function to call
*
* RETURNS:
* The found work or NULL if not found.
*/
struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func)
task_work_cancel_match(struct task_struct *task,
bool (*match)(struct callback_head *, void *data),
void *data)
{
struct callback_head **pprev = &task->task_works;
struct callback_head *work;
......@@ -86,7 +85,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
*/
raw_spin_lock_irqsave(&task->pi_lock, flags);
while ((work = READ_ONCE(*pprev))) {
if (work->func != func)
if (!match(work, data))
pprev = &work->next;
else if (cmpxchg(pprev, work, work->next) == work)
break;
......@@ -96,6 +95,28 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
return work;
}
static bool task_work_func_match(struct callback_head *cb, void *data)
{
return cb->func == data;
}
/**
* task_work_cancel - cancel a pending work added by task_work_add()
* @task: the task which should execute the work
* @func: identifies the work to remove
*
* Find the last queued pending work with ->func == @func and remove
* it from queue.
*
* RETURNS:
* The found work or NULL if not found.
*/
struct callback_head *
task_work_cancel(struct task_struct *task, task_work_func_t func)
{
return task_work_cancel_match(task, task_work_func_match, func);
}
/**
* task_work_run - execute the works added by task_work_add()
*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment