Commit b5ba65df authored by Jens Axboe's avatar Jens Axboe

Merge branch 'for-5.19/io_uring-socket' into for-5.19/io_uring-passthrough

* for-5.19/io_uring-socket:
  io_uring: use the text representation of ops in trace
  io_uring: rename op -> opcode
  io_uring: add io_uring_get_opcode
  io_uring: add type to op enum
  io_uring: add socket(2) support
  net: add __sys_socket_file()
  io_uring: fix trace for reduced sqe padding
  io_uring: add fgetxattr and getxattr support
  io_uring: add fsetxattr and setxattr support
  fs: split off do_getxattr from getxattr
  fs: split off setxattr_copy and do_setxattr function from setxattr
parents 13086899 033b87d2
...@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in, ...@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe, struct pipe_inode_info *opipe,
loff_t *offset, loff_t *offset,
size_t len, unsigned int flags); size_t len, unsigned int flags);
/*
* fs/xattr.c:
*/
struct xattr_name {
char name[XATTR_NAME_MAX + 1];
};
struct xattr_ctx {
/* Value of attribute */
union {
const void __user *cvalue;
void __user *value;
};
void *kvalue;
size_t size;
/* Attribute name */
struct xattr_name *kname;
unsigned int flags;
};
ssize_t do_getxattr(struct user_namespace *mnt_userns,
struct dentry *d,
struct xattr_ctx *ctx);
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct xattr_ctx *ctx);
...@@ -80,6 +80,7 @@ ...@@ -80,6 +80,7 @@
#include <linux/io_uring.h> #include <linux/io_uring.h>
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/xattr.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h> #include <trace/events/io_uring.h>
...@@ -578,6 +579,16 @@ struct io_accept { ...@@ -578,6 +579,16 @@ struct io_accept {
unsigned long nofile; unsigned long nofile;
}; };
struct io_socket {
struct file *file;
int domain;
int type;
int protocol;
int flags;
u32 file_slot;
unsigned long nofile;
};
struct io_sync { struct io_sync {
struct file *file; struct file *file;
loff_t len; loff_t len;
...@@ -782,6 +793,12 @@ struct io_async_rw { ...@@ -782,6 +793,12 @@ struct io_async_rw {
struct wait_page_queue wpq; struct wait_page_queue wpq;
}; };
struct io_xattr {
struct file *file;
struct xattr_ctx ctx;
struct filename *filename;
};
enum { enum {
REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT,
...@@ -946,6 +963,8 @@ struct io_kiocb { ...@@ -946,6 +963,8 @@ struct io_kiocb {
struct io_symlink symlink; struct io_symlink symlink;
struct io_hardlink hardlink; struct io_hardlink hardlink;
struct io_msg msg; struct io_msg msg;
struct io_xattr xattr;
struct io_socket sock;
}; };
u8 opcode; u8 opcode;
...@@ -1246,6 +1265,17 @@ static const struct io_op_def io_op_defs[] = { ...@@ -1246,6 +1265,17 @@ static const struct io_op_def io_op_defs[] = {
.needs_file = 1, .needs_file = 1,
.iopoll = 1, .iopoll = 1,
}, },
[IORING_OP_FSETXATTR] = {
.needs_file = 1
},
[IORING_OP_SETXATTR] = {},
[IORING_OP_FGETXATTR] = {
.needs_file = 1
},
[IORING_OP_GETXATTR] = {},
[IORING_OP_SOCKET] = {
.audit_skip = 1,
},
}; };
/* requests with any of those set should undergo io_disarm_next() */ /* requests with any of those set should undergo io_disarm_next() */
...@@ -1290,6 +1320,107 @@ static struct kmem_cache *req_cachep; ...@@ -1290,6 +1320,107 @@ static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops; static const struct file_operations io_uring_fops;
const char *io_uring_get_opcode(u8 opcode)
{
switch ((enum io_uring_op)opcode) {
case IORING_OP_NOP:
return "NOP";
case IORING_OP_READV:
return "READV";
case IORING_OP_WRITEV:
return "WRITEV";
case IORING_OP_FSYNC:
return "FSYNC";
case IORING_OP_READ_FIXED:
return "READ_FIXED";
case IORING_OP_WRITE_FIXED:
return "WRITE_FIXED";
case IORING_OP_POLL_ADD:
return "POLL_ADD";
case IORING_OP_POLL_REMOVE:
return "POLL_REMOVE";
case IORING_OP_SYNC_FILE_RANGE:
return "SYNC_FILE_RANGE";
case IORING_OP_SENDMSG:
return "SENDMSG";
case IORING_OP_RECVMSG:
return "RECVMSG";
case IORING_OP_TIMEOUT:
return "TIMEOUT";
case IORING_OP_TIMEOUT_REMOVE:
return "TIMEOUT_REMOVE";
case IORING_OP_ACCEPT:
return "ACCEPT";
case IORING_OP_ASYNC_CANCEL:
return "ASYNC_CANCEL";
case IORING_OP_LINK_TIMEOUT:
return "LINK_TIMEOUT";
case IORING_OP_CONNECT:
return "CONNECT";
case IORING_OP_FALLOCATE:
return "FALLOCATE";
case IORING_OP_OPENAT:
return "OPENAT";
case IORING_OP_CLOSE:
return "CLOSE";
case IORING_OP_FILES_UPDATE:
return "FILES_UPDATE";
case IORING_OP_STATX:
return "STATX";
case IORING_OP_READ:
return "READ";
case IORING_OP_WRITE:
return "WRITE";
case IORING_OP_FADVISE:
return "FADVISE";
case IORING_OP_MADVISE:
return "MADVISE";
case IORING_OP_SEND:
return "SEND";
case IORING_OP_RECV:
return "RECV";
case IORING_OP_OPENAT2:
return "OPENAT2";
case IORING_OP_EPOLL_CTL:
return "EPOLL_CTL";
case IORING_OP_SPLICE:
return "SPLICE";
case IORING_OP_PROVIDE_BUFFERS:
return "PROVIDE_BUFFERS";
case IORING_OP_REMOVE_BUFFERS:
return "REMOVE_BUFFERS";
case IORING_OP_TEE:
return "TEE";
case IORING_OP_SHUTDOWN:
return "SHUTDOWN";
case IORING_OP_RENAMEAT:
return "RENAMEAT";
case IORING_OP_UNLINKAT:
return "UNLINKAT";
case IORING_OP_MKDIRAT:
return "MKDIRAT";
case IORING_OP_SYMLINKAT:
return "SYMLINKAT";
case IORING_OP_LINKAT:
return "LINKAT";
case IORING_OP_MSG_RING:
return "MSG_RING";
case IORING_OP_FSETXATTR:
return "FSETXATTR";
case IORING_OP_SETXATTR:
return "SETXATTR";
case IORING_OP_FGETXATTR:
return "FGETXATTR";
case IORING_OP_GETXATTR:
return "GETXATTR";
case IORING_OP_SOCKET:
return "SOCKET";
case IORING_OP_LAST:
return "INVALID";
}
return "INVALID";
}
struct sock *io_uring_get_socket(struct file *file) struct sock *io_uring_get_socket(struct file *file)
{ {
#if defined(CONFIG_UNIX) #if defined(CONFIG_UNIX)
...@@ -4205,6 +4336,257 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags) ...@@ -4205,6 +4336,257 @@ static int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
return 0; return 0;
} }
static inline void __io_xattr_finish(struct io_kiocb *req)
{
struct io_xattr *ix = &req->xattr;
if (ix->filename)
putname(ix->filename);
kfree(ix->ctx.kname);
kvfree(ix->ctx.kvalue);
}
static void io_xattr_finish(struct io_kiocb *req, int ret)
{
req->flags &= ~REQ_F_NEED_CLEANUP;
__io_xattr_finish(req);
if (ret < 0)
req_set_fail(req);
io_req_complete(req, ret);
}
static int __io_getxattr_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_xattr *ix = &req->xattr;
const char __user *name;
int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (unlikely(sqe->ioprio))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
ix->filename = NULL;
ix->ctx.kvalue = NULL;
name = u64_to_user_ptr(READ_ONCE(sqe->addr));
ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
ix->ctx.size = READ_ONCE(sqe->len);
ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
if (ix->ctx.flags)
return -EINVAL;
ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
if (!ix->ctx.kname)
return -ENOMEM;
ret = strncpy_from_user(ix->ctx.kname->name, name,
sizeof(ix->ctx.kname->name));
if (!ret || ret == sizeof(ix->ctx.kname->name))
ret = -ERANGE;
if (ret < 0) {
kfree(ix->ctx.kname);
return ret;
}
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
static int io_fgetxattr_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
return __io_getxattr_prep(req, sqe);
}
static int io_getxattr_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_xattr *ix = &req->xattr;
const char __user *path;
int ret;
ret = __io_getxattr_prep(req, sqe);
if (ret)
return ret;
path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
if (IS_ERR(ix->filename)) {
ret = PTR_ERR(ix->filename);
ix->filename = NULL;
}
return ret;
}
static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_xattr *ix = &req->xattr;
int ret;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt),
req->file->f_path.dentry,
&ix->ctx);
io_xattr_finish(req, ret);
return 0;
}
static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_xattr *ix = &req->xattr;
unsigned int lookup_flags = LOOKUP_FOLLOW;
struct path path;
int ret;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
retry:
ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
if (!ret) {
ret = do_getxattr(mnt_user_ns(path.mnt),
path.dentry,
&ix->ctx);
path_put(&path);
if (retry_estale(ret, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
}
io_xattr_finish(req, ret);
return 0;
}
static int __io_setxattr_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_xattr *ix = &req->xattr;
const char __user *name;
int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (unlikely(sqe->ioprio))
return -EINVAL;
if (unlikely(req->flags & REQ_F_FIXED_FILE))
return -EBADF;
ix->filename = NULL;
name = u64_to_user_ptr(READ_ONCE(sqe->addr));
ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2));
ix->ctx.kvalue = NULL;
ix->ctx.size = READ_ONCE(sqe->len);
ix->ctx.flags = READ_ONCE(sqe->xattr_flags);
ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL);
if (!ix->ctx.kname)
return -ENOMEM;
ret = setxattr_copy(name, &ix->ctx);
if (ret) {
kfree(ix->ctx.kname);
return ret;
}
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
static int io_setxattr_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_xattr *ix = &req->xattr;
const char __user *path;
int ret;
ret = __io_setxattr_prep(req, sqe);
if (ret)
return ret;
path = u64_to_user_ptr(READ_ONCE(sqe->addr3));
ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL);
if (IS_ERR(ix->filename)) {
ret = PTR_ERR(ix->filename);
ix->filename = NULL;
}
return ret;
}
static int io_fsetxattr_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
return __io_setxattr_prep(req, sqe);
}
static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags,
struct path *path)
{
struct io_xattr *ix = &req->xattr;
int ret;
ret = mnt_want_write(path->mnt);
if (!ret) {
ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx);
mnt_drop_write(path->mnt);
}
return ret;
}
static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
{
int ret;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
ret = __io_setxattr(req, issue_flags, &req->file->f_path);
io_xattr_finish(req, ret);
return 0;
}
static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_xattr *ix = &req->xattr;
unsigned int lookup_flags = LOOKUP_FOLLOW;
struct path path;
int ret;
if (issue_flags & IO_URING_F_NONBLOCK)
return -EAGAIN;
retry:
ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL);
if (!ret) {
ret = __io_setxattr(req, issue_flags, &path);
path_put(&path);
if (retry_estale(ret, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
}
io_xattr_finish(req, ret);
return 0;
}
static int io_unlinkat_prep(struct io_kiocb *req, static int io_unlinkat_prep(struct io_kiocb *req,
const struct io_uring_sqe *sqe) const struct io_uring_sqe *sqe)
{ {
...@@ -5760,6 +6142,62 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags) ...@@ -5760,6 +6142,62 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
return 0; return 0;
} }
static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_socket *sock = &req->sock;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->ioprio || sqe->addr || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
sock->domain = READ_ONCE(sqe->fd);
sock->type = READ_ONCE(sqe->off);
sock->protocol = READ_ONCE(sqe->len);
sock->file_slot = READ_ONCE(sqe->file_index);
sock->nofile = rlimit(RLIMIT_NOFILE);
sock->flags = sock->type & ~SOCK_TYPE_MASK;
if (sock->file_slot && (sock->flags & SOCK_CLOEXEC))
return -EINVAL;
if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
return -EINVAL;
return 0;
}
static int io_socket(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_socket *sock = &req->sock;
bool fixed = !!sock->file_slot;
struct file *file;
int ret, fd;
if (!fixed) {
fd = __get_unused_fd_flags(sock->flags, sock->nofile);
if (unlikely(fd < 0))
return fd;
}
file = __sys_socket_file(sock->domain, sock->type, sock->protocol);
if (IS_ERR(file)) {
if (!fixed)
put_unused_fd(fd);
ret = PTR_ERR(file);
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return -EAGAIN;
if (ret == -ERESTARTSYS)
ret = -EINTR;
req_set_fail(req);
} else if (!fixed) {
fd_install(fd, file);
ret = fd;
} else {
ret = io_install_fixed_file(req, file, issue_flags,
sock->file_slot - 1);
}
__io_req_complete(req, issue_flags, ret, 0);
return 0;
}
static int io_connect_prep_async(struct io_kiocb *req) static int io_connect_prep_async(struct io_kiocb *req)
{ {
struct io_async_connect *io = req->async_data; struct io_async_connect *io = req->async_data;
...@@ -5845,6 +6283,7 @@ IO_NETOP_PREP_ASYNC(sendmsg); ...@@ -5845,6 +6283,7 @@ IO_NETOP_PREP_ASYNC(sendmsg);
IO_NETOP_PREP_ASYNC(recvmsg); IO_NETOP_PREP_ASYNC(recvmsg);
IO_NETOP_PREP_ASYNC(connect); IO_NETOP_PREP_ASYNC(connect);
IO_NETOP_PREP(accept); IO_NETOP_PREP(accept);
IO_NETOP_PREP(socket);
IO_NETOP_FN(send); IO_NETOP_FN(send);
IO_NETOP_FN(recv); IO_NETOP_FN(recv);
#endif /* CONFIG_NET */ #endif /* CONFIG_NET */
...@@ -7147,6 +7586,16 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -7147,6 +7586,16 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_linkat_prep(req, sqe); return io_linkat_prep(req, sqe);
case IORING_OP_MSG_RING: case IORING_OP_MSG_RING:
return io_msg_ring_prep(req, sqe); return io_msg_ring_prep(req, sqe);
case IORING_OP_FSETXATTR:
return io_fsetxattr_prep(req, sqe);
case IORING_OP_SETXATTR:
return io_setxattr_prep(req, sqe);
case IORING_OP_FGETXATTR:
return io_fgetxattr_prep(req, sqe);
case IORING_OP_GETXATTR:
return io_getxattr_prep(req, sqe);
case IORING_OP_SOCKET:
return io_socket_prep(req, sqe);
} }
printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
...@@ -7296,6 +7745,12 @@ static void io_clean_op(struct io_kiocb *req) ...@@ -7296,6 +7745,12 @@ static void io_clean_op(struct io_kiocb *req)
if (req->statx.filename) if (req->statx.filename)
putname(req->statx.filename); putname(req->statx.filename);
break; break;
case IORING_OP_SETXATTR:
case IORING_OP_FSETXATTR:
case IORING_OP_GETXATTR:
case IORING_OP_FGETXATTR:
__io_xattr_finish(req);
break;
} }
} }
if ((req->flags & REQ_F_POLLED) && req->apoll) { if ((req->flags & REQ_F_POLLED) && req->apoll) {
...@@ -7452,6 +7907,21 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) ...@@ -7452,6 +7907,21 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
case IORING_OP_MSG_RING: case IORING_OP_MSG_RING:
ret = io_msg_ring(req, issue_flags); ret = io_msg_ring(req, issue_flags);
break; break;
case IORING_OP_FSETXATTR:
ret = io_fsetxattr(req, issue_flags);
break;
case IORING_OP_SETXATTR:
ret = io_setxattr(req, issue_flags);
break;
case IORING_OP_FGETXATTR:
ret = io_fgetxattr(req, issue_flags);
break;
case IORING_OP_GETXATTR:
ret = io_getxattr(req, issue_flags);
break;
case IORING_OP_SOCKET:
ret = io_socket(req, issue_flags);
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
break; break;
...@@ -12025,6 +12495,7 @@ static int __init io_uring_init(void) ...@@ -12025,6 +12495,7 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(42, __u16, personality); BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
BUILD_BUG_SQE_ELEM(44, __u32, file_index); BUILD_BUG_SQE_ELEM(44, __u32, file_index);
BUILD_BUG_SQE_ELEM(48, __u64, addr3);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) != BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
sizeof(struct io_uring_rsrc_update)); sizeof(struct io_uring_rsrc_update));
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "internal.h"
static const char * static const char *
strcmp_prefix(const char *a, const char *a_prefix) strcmp_prefix(const char *a, const char *a_prefix)
{ {
...@@ -539,44 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); ...@@ -539,44 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
/* /*
* Extended attribute SET operations * Extended attribute SET operations
*/ */
static long
setxattr(struct user_namespace *mnt_userns, struct dentry *d, int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
const char __user *name, const void __user *value, size_t size,
int flags)
{ {
int error; int error;
void *kvalue = NULL;
char kname[XATTR_NAME_MAX + 1];
if (flags & ~(XATTR_CREATE|XATTR_REPLACE)) if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
return -EINVAL; return -EINVAL;
error = strncpy_from_user(kname, name, sizeof(kname)); error = strncpy_from_user(ctx->kname->name, name,
if (error == 0 || error == sizeof(kname)) sizeof(ctx->kname->name));
error = -ERANGE; if (error == 0 || error == sizeof(ctx->kname->name))
return -ERANGE;
if (error < 0) if (error < 0)
return error; return error;
if (size) { error = 0;
if (size > XATTR_SIZE_MAX) if (ctx->size) {
if (ctx->size > XATTR_SIZE_MAX)
return -E2BIG; return -E2BIG;
kvalue = kvmalloc(size, GFP_KERNEL);
if (!kvalue) ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
return -ENOMEM; if (IS_ERR(ctx->kvalue)) {
if (copy_from_user(kvalue, value, size)) { error = PTR_ERR(ctx->kvalue);
error = -EFAULT; ctx->kvalue = NULL;
goto out;
} }
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
kvalue, size);
} }
error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags); return error;
out: }
kvfree(kvalue);
static void setxattr_convert(struct user_namespace *mnt_userns,
struct dentry *d, struct xattr_ctx *ctx)
{
if (ctx->size &&
((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
ctx->kvalue, ctx->size);
}
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct xattr_ctx *ctx)
{
setxattr_convert(mnt_userns, dentry, ctx);
return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
ctx->kvalue, ctx->size, ctx->flags);
}
static long
setxattr(struct user_namespace *mnt_userns, struct dentry *d,
const char __user *name, const void __user *value, size_t size,
int flags)
{
struct xattr_name kname;
struct xattr_ctx ctx = {
.cvalue = value,
.kvalue = NULL,
.size = size,
.kname = &kname,
.flags = flags,
};
int error;
error = setxattr_copy(name, &ctx);
if (error)
return error;
error = do_setxattr(mnt_userns, d, &ctx);
kvfree(ctx.kvalue);
return error; return error;
} }
...@@ -642,44 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, ...@@ -642,44 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
/* /*
* Extended attribute GET operations * Extended attribute GET operations
*/ */
static ssize_t ssize_t
getxattr(struct user_namespace *mnt_userns, struct dentry *d, do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
const char __user *name, void __user *value, size_t size) struct xattr_ctx *ctx)
{ {
ssize_t error; ssize_t error;
void *kvalue = NULL; char *kname = ctx->kname->name;
char kname[XATTR_NAME_MAX + 1];
error = strncpy_from_user(kname, name, sizeof(kname));
if (error == 0 || error == sizeof(kname))
error = -ERANGE;
if (error < 0)
return error;
if (size) { if (ctx->size) {
if (size > XATTR_SIZE_MAX) if (ctx->size > XATTR_SIZE_MAX)
size = XATTR_SIZE_MAX; ctx->size = XATTR_SIZE_MAX;
kvalue = kvzalloc(size, GFP_KERNEL); ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
if (!kvalue) if (!ctx->kvalue)
return -ENOMEM; return -ENOMEM;
} }
error = vfs_getxattr(mnt_userns, d, kname, kvalue, size); error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
if (error > 0) { if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d), posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
kvalue, error); ctx->kvalue, error);
if (size && copy_to_user(value, kvalue, error)) if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT; error = -EFAULT;
} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
/* The file system tried to returned a value bigger /* The file system tried to returned a value bigger
than XATTR_SIZE_MAX bytes. Not possible. */ than XATTR_SIZE_MAX bytes. Not possible. */
error = -E2BIG; error = -E2BIG;
} }
kvfree(kvalue); return error;
}
static ssize_t
getxattr(struct user_namespace *mnt_userns, struct dentry *d,
const char __user *name, void __user *value, size_t size)
{
ssize_t error;
struct xattr_name kname;
struct xattr_ctx ctx = {
.value = value,
.kvalue = NULL,
.size = size,
.kname = &kname,
.flags = 0,
};
error = strncpy_from_user(kname.name, name, sizeof(kname.name));
if (error == 0 || error == sizeof(kname.name))
error = -ERANGE;
if (error < 0)
return error;
error = do_getxattr(mnt_userns, d, &ctx);
kvfree(ctx.kvalue);
return error; return error;
} }
......
...@@ -10,6 +10,7 @@ struct sock *io_uring_get_socket(struct file *file); ...@@ -10,6 +10,7 @@ struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all); void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk); void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void); void io_uring_unreg_ringfd(void);
const char *io_uring_get_opcode(u8 opcode);
static inline void io_uring_files_cancel(void) static inline void io_uring_files_cancel(void)
{ {
...@@ -42,6 +43,10 @@ static inline void io_uring_files_cancel(void) ...@@ -42,6 +43,10 @@ static inline void io_uring_files_cancel(void)
static inline void io_uring_free(struct task_struct *tsk) static inline void io_uring_free(struct task_struct *tsk)
{ {
} }
static inline const char *io_uring_get_opcode(u8 opcode)
{
return "";
}
#endif #endif
#endif #endif
...@@ -434,6 +434,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags, ...@@ -434,6 +434,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags); int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol); extern int __sys_socket(int family, int type, int protocol);
extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags); int addrlen, int file_flags);
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include <uapi/linux/io_uring.h> #include <uapi/linux/io_uring.h>
#include <linux/io_uring.h>
struct io_wq_work; struct io_wq_work;
...@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work, ...@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work,
__entry->rw = rw; __entry->rw = rw;
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
); );
...@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer, ...@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer,
__entry->opcode = opcode; __entry->opcode = opcode;
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
__entry->ctx, __entry->req, __entry->data, __entry->opcode) __entry->ctx, __entry->req, __entry->data,
io_uring_get_opcode(__entry->opcode))
); );
/** /**
...@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link, ...@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link,
__entry->link = link; __entry->link = link;
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data,
__entry->link) io_uring_get_opcode(__entry->opcode), __entry->link)
); );
/** /**
...@@ -389,9 +392,9 @@ TRACE_EVENT(io_uring_submit_sqe, ...@@ -389,9 +392,9 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->sq_thread = sq_thread; __entry->sq_thread = sq_thread;
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, " TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req, "non block %d, sq_thread %d", __entry->ctx, __entry->req,
__entry->user_data, __entry->opcode, __entry->user_data, io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->force_nonblock, __entry->sq_thread) __entry->flags, __entry->force_nonblock, __entry->sq_thread)
); );
...@@ -433,8 +436,9 @@ TRACE_EVENT(io_uring_poll_arm, ...@@ -433,8 +436,9 @@ TRACE_EVENT(io_uring_poll_arm,
__entry->events = events; __entry->events = events;
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x", TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode),
__entry->mask, __entry->events) __entry->mask, __entry->events)
); );
...@@ -470,8 +474,9 @@ TRACE_EVENT(io_uring_task_add, ...@@ -470,8 +474,9 @@ TRACE_EVENT(io_uring_task_add,
__entry->mask = mask; __entry->mask = mask;
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x", TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode),
__entry->mask) __entry->mask)
); );
...@@ -506,7 +511,7 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -506,7 +511,7 @@ TRACE_EVENT(io_uring_req_failed,
__field( u16, personality ) __field( u16, personality )
__field( u32, file_index ) __field( u32, file_index )
__field( u64, pad1 ) __field( u64, pad1 )
__field( u64, pad2 ) __field( u64, addr3 )
__field( int, error ) __field( int, error )
), ),
...@@ -525,22 +530,24 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -525,22 +530,24 @@ TRACE_EVENT(io_uring_req_failed,
__entry->personality = sqe->personality; __entry->personality = sqe->personality;
__entry->file_index = sqe->file_index; __entry->file_index = sqe->file_index;
__entry->pad1 = sqe->__pad2[0]; __entry->pad1 = sqe->__pad2[0];
__entry->pad2 = sqe->__pad2[1]; __entry->addr3 = sqe->addr3;
__entry->error = error; __entry->error = error;
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, " TP_printk("ring %p, req %p, user_data 0x%llx, "
"op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, " "opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
"len=%u, rw_flags=0x%x, buf_index=%d, " "len=%u, rw_flags=0x%x, buf_index=%d, "
"personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
"error=%d",
__entry->ctx, __entry->req, __entry->user_data, __entry->ctx, __entry->req, __entry->user_data,
__entry->opcode, __entry->flags, __entry->ioprio, io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->ioprio,
(unsigned long long)__entry->off, (unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len, (unsigned long long) __entry->addr, __entry->len,
__entry->op_flags, __entry->op_flags,
__entry->buf_index, __entry->personality, __entry->file_index, __entry->buf_index, __entry->personality, __entry->file_index,
(unsigned long long) __entry->pad1, (unsigned long long) __entry->pad1,
(unsigned long long) __entry->pad2, __entry->error) (unsigned long long) __entry->addr3, __entry->error)
); );
......
...@@ -45,6 +45,7 @@ struct io_uring_sqe { ...@@ -45,6 +45,7 @@ struct io_uring_sqe {
__u32 rename_flags; __u32 rename_flags;
__u32 unlink_flags; __u32 unlink_flags;
__u32 hardlink_flags; __u32 hardlink_flags;
__u32 xattr_flags;
}; };
__u64 user_data; /* data to be passed back at completion time */ __u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */ /* pack this to avoid bogus arm OABI complaints */
...@@ -60,7 +61,8 @@ struct io_uring_sqe { ...@@ -60,7 +61,8 @@ struct io_uring_sqe {
__s32 splice_fd_in; __s32 splice_fd_in;
__u32 file_index; __u32 file_index;
}; };
__u64 __pad2[2]; __u64 addr3;
__u64 __pad2[1];
}; };
enum { enum {
...@@ -117,7 +119,7 @@ enum { ...@@ -117,7 +119,7 @@ enum {
*/ */
#define IORING_SETUP_TASKRUN_FLAG (1U << 9) #define IORING_SETUP_TASKRUN_FLAG (1U << 9)
enum { enum io_uring_op {
IORING_OP_NOP, IORING_OP_NOP,
IORING_OP_READV, IORING_OP_READV,
IORING_OP_WRITEV, IORING_OP_WRITEV,
...@@ -159,6 +161,11 @@ enum { ...@@ -159,6 +161,11 @@ enum {
IORING_OP_SYMLINKAT, IORING_OP_SYMLINKAT,
IORING_OP_LINKAT, IORING_OP_LINKAT,
IORING_OP_MSG_RING, IORING_OP_MSG_RING,
IORING_OP_FSETXATTR,
IORING_OP_SETXATTR,
IORING_OP_FGETXATTR,
IORING_OP_GETXATTR,
IORING_OP_SOCKET,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
......
...@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags) ...@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
struct socket *sock_from_file(struct file *file) struct socket *sock_from_file(struct file *file)
{ {
if (file->f_op == &socket_file_ops) if (file->f_op == &socket_file_ops)
return file->private_data; /* set in sock_map_fd */ return file->private_data; /* set in sock_alloc_file */
return NULL; return NULL;
} }
...@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct ...@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
} }
EXPORT_SYMBOL(sock_create_kern); EXPORT_SYMBOL(sock_create_kern);
int __sys_socket(int family, int type, int protocol) static struct socket *__sys_socket_create(int family, int type, int protocol)
{ {
int retval;
struct socket *sock; struct socket *sock;
int flags; int retval;
/* Check the SOCK_* constants for consistency. */ /* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
...@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol) ...@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol)
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
flags = type & ~SOCK_TYPE_MASK; if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return ERR_PTR(-EINVAL);
return -EINVAL;
type &= SOCK_TYPE_MASK; type &= SOCK_TYPE_MASK;
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
return ERR_PTR(retval);
return sock;
}
struct file *__sys_socket_file(int family, int type, int protocol)
{
struct socket *sock;
struct file *file;
int flags;
sock = __sys_socket_create(family, type, protocol);
if (IS_ERR(sock))
return ERR_CAST(sock);
flags = type & ~SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
retval = sock_create(family, type, protocol, &sock); file = sock_alloc_file(sock, flags, NULL);
if (retval < 0) if (IS_ERR(file))
return retval; sock_release(sock);
return file;
}
int __sys_socket(int family, int type, int protocol)
{
struct socket *sock;
int flags;
sock = __sys_socket_create(family, type, protocol);
if (IS_ERR(sock))
return PTR_ERR(sock);
flags = type & ~SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment