Commit 4c72e2b8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux

Pull io_uring updates from Jens Axboe:
 "Mostly just come fixes and cleanups, but one feature as well. In
  detail:

   - Harden the check for handling IOPOLL based on return (Pavel)

   - Various minor optimizations (Pavel)

   - Drop remnants of SCM_RIGHTS fd passing support, now that it's no
     longer supported since 6.7 (me)

   - Fix for a case where bytes_done wasn't initialized properly on a
     failure condition for read/write requests (me)

   - Move the register related code to a separate file (me)

   - Add support for returning the provided ring buffer head (me)

   - Add support for adding a direct descriptor to the normal file table
     (me, Christian Brauner)

   - Fix for ensuring pending task_work for a ring with DEFER_TASKRUN is
     run even if we timeout waiting (me)"

* tag 'for-6.8/io_uring-2024-01-08' of git://git.kernel.dk/linux:
  io_uring: ensure local task_work is run on wait timeout
  io_uring/kbuf: add method for returning provided buffer ring head
  io_uring/rw: ensure io->bytes_done is always initialized
  io_uring: drop any code related to SCM_RIGHTS
  io_uring/unix: drop usage of io_uring socket
  io_uring/register: move io_uring_register(2) related code to register.c
  io_uring/openclose: add support for IORING_OP_FIXED_FD_INSTALL
  io_uring/cmd: inline io_uring_cmd_get_task
  io_uring/cmd: inline io_uring_cmd_do_in_task_lazy
  io_uring: split out cmd api into a separate header
  io_uring: optimise ltimeout for inline execution
  io_uring: don't check iopoll if request completes
parents 01d550f0 6ff1407e
......@@ -11142,6 +11142,7 @@ L: io-uring@vger.kernel.org
S: Maintained
T: git git://git.kernel.dk/linux-block
T: git git://git.kernel.dk/liburing
F: include/linux/io_uring/
F: include/linux/io_uring.h
F: include/linux/io_uring_types.h
F: include/trace/events/io_uring.h
......
......@@ -36,7 +36,7 @@
#include <linux/sched/mm.h>
#include <linux/uaccess.h>
#include <linux/cdev.h>
#include <linux/io_uring.h>
#include <linux/io_uring/cmd.h>
#include <linux/blk-mq.h>
#include <linux/delay.h>
#include <linux/mm.h>
......
......@@ -5,7 +5,7 @@
*/
#include <linux/ptrace.h> /* for force_successful_syscall_return */
#include <linux/nvme_ioctl.h>
#include <linux/io_uring.h>
#include <linux/io_uring/cmd.h>
#include "nvme.h"
enum {
......
......@@ -6,66 +6,13 @@
#include <linux/xarray.h>
#include <uapi/linux/io_uring.h>
enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1,
IO_URING_F_UNLOCKED = 2,
/* the request is executed from poll, it should not be freed */
IO_URING_F_MULTISHOT = 4,
/* executed by io-wq */
IO_URING_F_IOWQ = 8,
/* int's last bit, sign checks are usually faster than a bit test */
IO_URING_F_NONBLOCK = INT_MIN,
/* ctx state flags, for URING_CMD */
IO_URING_F_SQE128 = (1 << 8),
IO_URING_F_CQE32 = (1 << 9),
IO_URING_F_IOPOLL = (1 << 10),
/* set when uring wants to cancel a previously issued command */
IO_URING_F_CANCEL = (1 << 11),
IO_URING_F_COMPAT = (1 << 12),
};
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
#define IORING_URING_CMD_CANCELABLE (1U << 30)
struct io_uring_cmd {
struct file *file;
const struct io_uring_sqe *sqe;
/* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
u32 cmd_op;
u32 flags;
u8 pdu[32]; /* available inline for free use */
};
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
{
return sqe->cmd;
}
#if defined(CONFIG_IO_URING)
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
unsigned issue_flags);
struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void);
const char *io_uring_get_opcode(u8 opcode);
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags);
/* users should follow semantics of IOU_F_TWQ_LAZY_WAKE */
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned));
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
}
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
bool io_is_uring_fops(struct file *file);
static inline void io_uring_files_cancel(void)
{
......@@ -84,32 +31,7 @@ static inline void io_uring_free(struct task_struct *tsk)
if (tsk->io_uring)
__io_uring_free(tsk);
}
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags);
struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd);
#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
{
return -EOPNOTSUPP;
}
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
ssize_t ret2, unsigned issue_flags)
{
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
}
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
}
static inline struct sock *io_uring_get_socket(struct file *file)
{
return NULL;
}
static inline void io_uring_task_cancel(void)
{
}
......@@ -128,13 +50,9 @@ static inline int io_uring_cmd_sock(struct io_uring_cmd *cmd,
{
return -EOPNOTSUPP;
}
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
}
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
static inline bool io_is_uring_fops(struct file *file)
{
return NULL;
return false;
}
#endif
......
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _LINUX_IO_URING_CMD_H
#define _LINUX_IO_URING_CMD_H
#include <uapi/linux/io_uring.h>
#include <linux/io_uring_types.h>
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
#define IORING_URING_CMD_CANCELABLE (1U << 30)
struct io_uring_cmd {
struct file *file;
const struct io_uring_sqe *sqe;
/* callback to defer completions to task context */
void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned);
u32 cmd_op;
u32 flags;
u8 pdu[32]; /* available inline for free use */
};
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
{
return sqe->cmd;
}
#if defined(CONFIG_IO_URING)
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2,
unsigned issue_flags);
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags);
void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags);
#else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
{
return -EOPNOTSUPP;
}
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
ssize_t ret2, unsigned issue_flags)
{
}
static inline void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned),
unsigned flags)
{
}
static inline void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
}
#endif
/* users must follow the IOU_F_TWQ_LAZY_WAKE semantics */
static inline void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
}
static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, 0);
}
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
{
return cmd_to_io_kiocb(cmd)->task;
}
#endif /* _LINUX_IO_URING_CMD_H */
......@@ -7,6 +7,37 @@
#include <linux/llist.h>
#include <uapi/linux/io_uring.h>
enum {
/*
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
*
* Must not be used wirh requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
IOU_F_TWQ_LAZY_WAKE = 1,
};
enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1,
IO_URING_F_UNLOCKED = 2,
/* the request is executed from poll, it should not be freed */
IO_URING_F_MULTISHOT = 4,
/* executed by io-wq */
IO_URING_F_IOWQ = 8,
/* int's last bit, sign checks are usually faster than a bit test */
IO_URING_F_NONBLOCK = INT_MIN,
/* ctx state flags, for URING_CMD */
IO_URING_F_SQE128 = (1 << 8),
IO_URING_F_CQE32 = (1 << 9),
IO_URING_F_IOPOLL = (1 << 10),
/* set when uring wants to cancel a previously issued command */
IO_URING_F_CANCEL = (1 << 11),
IO_URING_F_COMPAT = (1 << 12),
};
struct io_wq_work_node {
struct io_wq_work_node *next;
};
......@@ -358,9 +389,6 @@ struct io_ring_ctx {
struct wait_queue_head rsrc_quiesce_wq;
unsigned rsrc_quiesce;
#if defined(CONFIG_UNIX)
struct socket *ring_sock;
#endif
/* hashed buffered write serialization */
struct io_wq_hash *hash_map;
......
......@@ -71,6 +71,7 @@ struct io_uring_sqe {
__u32 uring_cmd_flags;
__u32 waitid_flags;
__u32 futex_flags;
__u32 install_fd_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
......@@ -253,6 +254,7 @@ enum io_uring_op {
IORING_OP_FUTEX_WAIT,
IORING_OP_FUTEX_WAKE,
IORING_OP_FUTEX_WAITV,
IORING_OP_FIXED_FD_INSTALL,
/* this goes last, obviously */
IORING_OP_LAST,
......@@ -386,6 +388,13 @@ enum {
/* Pass through the flags from sqe->file_index to cqe->flags */
#define IORING_MSG_RING_FLAGS_PASS (1U << 1)
/*
* IORING_OP_FIXED_FD_INSTALL flags (sqe->install_fd_flags)
*
* IORING_FIXED_FD_NO_CLOEXEC Don't mark the fd as O_CLOEXEC
*/
#define IORING_FIXED_FD_NO_CLOEXEC (1U << 0)
/*
* IO completion data structure (Completion Queue Entry)
*/
......@@ -558,6 +567,9 @@ enum {
/* register a range of fixed file slots for automatic slot allocation */
IORING_REGISTER_FILE_ALLOC_RANGE = 25,
/* return status information for a buffer group */
IORING_REGISTER_PBUF_STATUS = 26,
/* this goes last */
IORING_REGISTER_LAST,
......@@ -684,6 +696,13 @@ struct io_uring_buf_reg {
__u64 resv[3];
};
/* argument for IORING_REGISTER_PBUF_STATUS */
struct io_uring_buf_status {
__u32 buf_group; /* input */
__u32 head; /* output */
__u32 resv[8];
};
/*
* io_uring_restriction->opcode values
*/
......
......@@ -8,6 +8,6 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
statx.o net.o msg_ring.o timeout.o \
sqpoll.o fdinfo.o tctx.o poll.o \
cancel.o kbuf.o rsrc.o rw.o opdef.o \
notif.o waitid.o
notif.o waitid.o register.o
obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX) += futex.o
......@@ -87,13 +87,10 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
io_file_bitmap_clear(&ctx->file_table, slot_index);
}
ret = io_scm_file_account(ctx, file);
if (!ret) {
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
io_fixed_file_set(file_slot, file);
io_file_bitmap_set(&ctx->file_table, slot_index);
}
return ret;
*io_get_tag_slot(ctx->file_data, slot_index) = 0;
io_fixed_file_set(file_slot, file);
io_file_bitmap_set(&ctx->file_table, slot_index);
return 0;
}
int __io_fixed_fd_install(struct io_ring_ctx *ctx, struct file *file,
......
This diff is collapsed.
......@@ -15,16 +15,6 @@
#include <trace/events/io_uring.h>
#endif
enum {
/*
* A hint to not wake right away but delay until there are enough of
* tw's queued to match the number of CQEs the task is waiting for.
*
* Must not be used wirh requests generating more than one CQE.
* It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
*/
IOU_F_TWQ_LAZY_WAKE = 1,
};
enum {
IOU_OK = 0,
......@@ -54,7 +44,6 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
bool io_is_uring_fops(struct file *file);
bool io_alloc_async_data(struct io_kiocb *req);
void io_req_task_queue(struct io_kiocb *req);
void io_queue_iowq(struct io_kiocb *req, struct io_tw_state *ts_dont_use);
......@@ -89,6 +78,14 @@ bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
void *io_mem_alloc(size_t size);
void io_mem_free(void *ptr);
enum {
IO_EVENTFD_OP_SIGNAL_BIT,
IO_EVENTFD_OP_FREE_BIT,
};
void io_eventfd_ops(struct rcu_head *rcu);
void io_activate_pollwq(struct io_ring_ctx *ctx);
#if defined(CONFIG_PROVE_LOCKING)
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
{
......
......@@ -750,6 +750,32 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
return 0;
}
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_uring_buf_status buf_status;
struct io_buffer_list *bl;
int i;
if (copy_from_user(&buf_status, arg, sizeof(buf_status)))
return -EFAULT;
for (i = 0; i < ARRAY_SIZE(buf_status.resv); i++)
if (buf_status.resv[i])
return -EINVAL;
bl = io_buffer_get_list(ctx, buf_status.buf_group);
if (!bl)
return -ENOENT;
if (!bl->is_mapped)
return -EINVAL;
buf_status.head = bl->head;
if (copy_to_user(arg, &buf_status, sizeof(buf_status)))
return -EFAULT;
return 0;
}
void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
{
struct io_buffer_list *bl;
......
......@@ -53,6 +53,7 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx);
......
......@@ -469,6 +469,12 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_eopnotsupp_prep,
#endif
},
[IORING_OP_FIXED_FD_INSTALL] = {
.needs_file = 1,
.audit_skip = 1,
.prep = io_install_fixed_fd_prep,
.issue = io_install_fixed_fd,
},
};
const struct io_cold_def io_cold_defs[] = {
......@@ -704,6 +710,9 @@ const struct io_cold_def io_cold_defs[] = {
[IORING_OP_FUTEX_WAITV] = {
.name = "FUTEX_WAITV",
},
[IORING_OP_FIXED_FD_INSTALL] = {
.name = "FIXED_FD_INSTALL",
},
};
const char *io_uring_get_opcode(u8 opcode)
......
......@@ -31,6 +31,11 @@ struct io_close {
u32 file_slot;
};
struct io_fixed_install {
struct file *file;
unsigned int o_flags;
};
static bool io_openat_force_async(struct io_open *open)
{
/*
......@@ -254,3 +259,42 @@ int io_close(struct io_kiocb *req, unsigned int issue_flags)
io_req_set_res(req, ret, 0);
return IOU_OK;
}
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_fixed_install *ifi;
unsigned int flags;
if (sqe->off || sqe->addr || sqe->len || sqe->buf_index ||
sqe->splice_fd_in || sqe->addr3)
return -EINVAL;
/* must be a fixed file */
if (!(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
flags = READ_ONCE(sqe->install_fd_flags);
if (flags & ~IORING_FIXED_FD_NO_CLOEXEC)
return -EINVAL;
/* default to O_CLOEXEC, disable if IORING_FIXED_FD_NO_CLOEXEC is set */
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
ifi->o_flags = O_CLOEXEC;
if (flags & IORING_FIXED_FD_NO_CLOEXEC)
ifi->o_flags = 0;
return 0;
}
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_fixed_install *ifi;
int ret;
ifi = io_kiocb_to_cmd(req, struct io_fixed_install);
ret = receive_fd(req->file, NULL, ifi->o_flags);
if (ret < 0)
req_set_fail(req);
io_req_set_res(req, ret, 0);
return IOU_OK;
}
......@@ -12,3 +12,6 @@ int io_openat2(struct io_kiocb *req, unsigned int issue_flags);
int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_close(struct io_kiocb *req, unsigned int issue_flags);
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags);
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
#ifndef IORING_REGISTER_H
#define IORING_REGISTER_H
int io_eventfd_unregister(struct io_ring_ctx *ctx);
int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id);
#endif
......@@ -24,7 +24,6 @@ struct io_rsrc_update {
};
static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
struct io_mapped_ubuf **pimu,
struct page **last_hpage);
......@@ -157,7 +156,7 @@ static void io_rsrc_put_work(struct io_rsrc_node *node)
switch (node->type) {
case IORING_RSRC_FILE:
io_rsrc_file_put(node->ctx, prsrc);
fput(prsrc->file);
break;
case IORING_RSRC_BUFFER:
io_rsrc_buf_put(node->ctx, prsrc);
......@@ -402,23 +401,13 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
break;
}
/*
* Don't allow io_uring instances to be registered. If
* UNIX isn't enabled, then this causes a reference
* cycle and this instance can never get freed. If UNIX
* is enabled we'll handle it just fine, but there's
* still no point in allowing a ring fd as it doesn't
* support regular read/write anyway.
* Don't allow io_uring instances to be registered.
*/
if (io_is_uring_fops(file)) {
fput(file);
err = -EBADF;
break;
}
err = io_scm_file_account(ctx, file);
if (err) {
fput(file);
break;
}
*io_get_tag_slot(data, i) = tag;
io_fixed_file_set(file_slot, file);
io_file_bitmap_set(&ctx->file_table, i);
......@@ -675,22 +664,12 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
for (i = 0; i < ctx->nr_user_files; i++) {
struct file *file = io_file_from_index(&ctx->file_table, i);
/* skip scm accounted files, they'll be freed by ->ring_sock */
if (!file || io_file_need_scm(file))
if (!file)
continue;
io_file_bitmap_clear(&ctx->file_table, i);
fput(file);
}
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {
struct sock *sock = ctx->ring_sock->sk;
struct sk_buff *skb;
while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL)
kfree_skb(skb);
}
#endif
io_free_file_tables(&ctx->file_table);
io_file_table_set_alloc_range(ctx, 0, 0);
io_rsrc_data_free(ctx->file_data);
......@@ -718,137 +697,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx)
return ret;
}
/*
* Ensure the UNIX gc is aware of our file set, so we are certain that
* the io_uring can be safely unregistered on process exit, even if we have
* loops in the file referencing. We account only files that can hold other
* files because otherwise they can't form a loop and so are not interesting
* for GC.
*/
int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
{
#if defined(CONFIG_UNIX)
struct sock *sk = ctx->ring_sock->sk;
struct sk_buff_head *head = &sk->sk_receive_queue;
struct scm_fp_list *fpl;
struct sk_buff *skb;
if (likely(!io_file_need_scm(file)))
return 0;
/*
* See if we can merge this file into an existing skb SCM_RIGHTS
* file set. If there's no room, fall back to allocating a new skb
* and filling it in.
*/
spin_lock_irq(&head->lock);
skb = skb_peek(head);
if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD)
__skb_unlink(skb, head);
else
skb = NULL;
spin_unlock_irq(&head->lock);
if (!skb) {
fpl = kzalloc(sizeof(*fpl), GFP_KERNEL);
if (!fpl)
return -ENOMEM;
skb = alloc_skb(0, GFP_KERNEL);
if (!skb) {
kfree(fpl);
return -ENOMEM;
}
fpl->user = get_uid(current_user());
fpl->max = SCM_MAX_FD;
fpl->count = 0;
UNIXCB(skb).fp = fpl;
skb->sk = sk;
skb->destructor = io_uring_destruct_scm;
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
}
fpl = UNIXCB(skb).fp;
fpl->fp[fpl->count++] = get_file(file);
unix_inflight(fpl->user, file);
skb_queue_head(head, skb);
fput(file);
#endif
return 0;
}
static __cold void io_rsrc_file_scm_put(struct io_ring_ctx *ctx, struct file *file)
{
#if defined(CONFIG_UNIX)
struct sock *sock = ctx->ring_sock->sk;
struct sk_buff_head list, *head = &sock->sk_receive_queue;
struct sk_buff *skb;
int i;
__skb_queue_head_init(&list);
/*
* Find the skb that holds this file in its SCM_RIGHTS. When found,
* remove this entry and rearrange the file array.
*/
skb = skb_dequeue(head);
while (skb) {
struct scm_fp_list *fp;
fp = UNIXCB(skb).fp;
for (i = 0; i < fp->count; i++) {
int left;
if (fp->fp[i] != file)
continue;
unix_notinflight(fp->user, fp->fp[i]);
left = fp->count - 1 - i;
if (left) {
memmove(&fp->fp[i], &fp->fp[i + 1],
left * sizeof(struct file *));
}
fp->count--;
if (!fp->count) {
kfree_skb(skb);
skb = NULL;
} else {
__skb_queue_tail(&list, skb);
}
fput(file);
file = NULL;
break;
}
if (!file)
break;
__skb_queue_tail(&list, skb);
skb = skb_dequeue(head);
}
if (skb_peek(&list)) {
spin_lock_irq(&head->lock);
while ((skb = __skb_dequeue(&list)) != NULL)
__skb_queue_tail(head, skb);
spin_unlock_irq(&head->lock);
}
#endif
}
static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
{
struct file *file = prsrc->file;
if (likely(!io_file_need_scm(file)))
fput(file);
else
io_rsrc_file_scm_put(ctx, file);
}
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args, u64 __user *tags)
{
......@@ -897,21 +745,12 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
goto fail;
/*
* Don't allow io_uring instances to be registered. If UNIX
* isn't enabled, then this causes a reference cycle and this
* instance can never get freed. If UNIX is enabled we'll
* handle it just fine, but there's still no point in allowing
* a ring fd as it doesn't support regular read/write anyway.
* Don't allow io_uring instances to be registered.
*/
if (io_is_uring_fops(file)) {
fput(file);
goto fail;
}
ret = io_scm_file_account(ctx, file);
if (ret) {
fput(file);
goto fail;
}
file_slot = io_fixed_file_slot(&ctx->file_table, i);
io_fixed_file_set(file_slot, file);
io_file_bitmap_set(&ctx->file_table, i);
......
......@@ -75,21 +75,6 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx);
int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args, u64 __user *tags);
int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file);
static inline bool io_file_need_scm(struct file *filp)
{
return false;
}
static inline int io_scm_file_account(struct io_ring_ctx *ctx,
struct file *file)
{
if (likely(!io_file_need_scm(file)))
return 0;
return __io_scm_file_account(ctx, file);
}
int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
unsigned nr_args);
int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
......
......@@ -10,7 +10,7 @@
#include <linux/poll.h>
#include <linux/nospec.h>
#include <linux/compat.h>
#include <linux/io_uring.h>
#include <linux/io_uring/cmd.h>
#include <uapi/linux/io_uring.h>
......@@ -589,15 +589,19 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
struct iovec *iov;
int ret;
iorw->bytes_done = 0;
iorw->free_iovec = NULL;
/* submission path, ->uring_lock should already be taken */
ret = io_import_iovec(rw, req, &iov, &iorw->s, 0);
if (unlikely(ret < 0))
return ret;
iorw->bytes_done = 0;
iorw->free_iovec = iov;
if (iov)
if (iov) {
iorw->free_iovec = iov;
req->flags |= REQ_F_NEED_CLEANUP;
}
return 0;
}
......
......@@ -2,7 +2,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/io_uring.h>
#include <linux/io_uring/cmd.h>
#include <linux/security.h>
#include <linux/nospec.h>
......@@ -52,12 +52,6 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
}
EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
{
return cmd_to_io_kiocb(cmd)->task;
}
EXPORT_SYMBOL_GPL(io_uring_cmd_get_task);
static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
......@@ -78,13 +72,6 @@ void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
}
EXPORT_SYMBOL_GPL(__io_uring_cmd_do_in_task);
void io_uring_cmd_do_in_task_lazy(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *, unsigned))
{
__io_uring_cmd_do_in_task(ioucmd, task_work_cb, IOU_F_TWQ_LAZY_WAKE);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_do_in_task_lazy);
static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
u64 extra1, u64 extra2)
{
......
......@@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
/* don't allow io_uring files */
if (io_uring_get_socket(file)) {
if (io_is_uring_fops(file)) {
fput(file);
return -EINVAL;
}
......
......@@ -35,10 +35,8 @@ struct sock *unix_get_socket(struct file *filp)
/* PF_UNIX ? */
if (s && ops && ops->family == PF_UNIX)
u_sock = s;
} else {
/* Could be an io_uring instance */
u_sock = io_uring_get_socket(filp);
}
return u_sock;
}
EXPORT_SYMBOL(unix_get_socket);
......
......@@ -92,7 +92,7 @@
#include <uapi/linux/mount.h>
#include <linux/fsnotify.h>
#include <linux/fanotify.h>
#include <linux/io_uring.h>
#include <linux/io_uring/cmd.h>
#include <uapi/linux/lsm.h>
#include "avc.h"
......
......@@ -43,7 +43,7 @@
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
#include <linux/watch_queue.h>
#include <linux/io_uring.h>
#include <linux/io_uring/cmd.h>
#include <uapi/linux/lsm.h>
#include "smack.h"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment