Commit b5ba65df authored by Jens Axboe's avatar Jens Axboe

Merge branch 'for-5.19/io_uring-socket' into for-5.19/io_uring-passthrough

* for-5.19/io_uring-socket:
  io_uring: use the text representation of ops in trace
  io_uring: rename op -> opcode
  io_uring: add io_uring_get_opcode
  io_uring: add type to op enum
  io_uring: add socket(2) support
  net: add __sys_socket_file()
  io_uring: fix trace for reduced sqe padding
  io_uring: add fgetxattr and getxattr support
  io_uring: add fsetxattr and setxattr support
  fs: split off do_getxattr from getxattr
  fs: split off setxattr_copy and do_setxattr function from setxattr
parents 13086899 033b87d2
...@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in, ...@@ -191,3 +191,32 @@ long splice_file_to_pipe(struct file *in,
struct pipe_inode_info *opipe, struct pipe_inode_info *opipe,
loff_t *offset, loff_t *offset,
size_t len, unsigned int flags); size_t len, unsigned int flags);
/*
* fs/xattr.c:
*/
struct xattr_name {
char name[XATTR_NAME_MAX + 1];
};
struct xattr_ctx {
/* Value of attribute */
union {
const void __user *cvalue;
void __user *value;
};
void *kvalue;
size_t size;
/* Attribute name */
struct xattr_name *kname;
unsigned int flags;
};
ssize_t do_getxattr(struct user_namespace *mnt_userns,
struct dentry *d,
struct xattr_ctx *ctx);
int setxattr_copy(const char __user *name, struct xattr_ctx *ctx);
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct xattr_ctx *ctx);
This diff is collapsed.
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "internal.h"
static const char * static const char *
strcmp_prefix(const char *a, const char *a_prefix) strcmp_prefix(const char *a, const char *a_prefix)
{ {
...@@ -539,44 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr); ...@@ -539,44 +541,76 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
/* /*
* Extended attribute SET operations * Extended attribute SET operations
*/ */
static long
setxattr(struct user_namespace *mnt_userns, struct dentry *d, int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
const char __user *name, const void __user *value, size_t size,
int flags)
{ {
int error; int error;
void *kvalue = NULL;
char kname[XATTR_NAME_MAX + 1];
if (flags & ~(XATTR_CREATE|XATTR_REPLACE)) if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
return -EINVAL; return -EINVAL;
error = strncpy_from_user(kname, name, sizeof(kname)); error = strncpy_from_user(ctx->kname->name, name,
if (error == 0 || error == sizeof(kname)) sizeof(ctx->kname->name));
error = -ERANGE; if (error == 0 || error == sizeof(ctx->kname->name))
return -ERANGE;
if (error < 0) if (error < 0)
return error; return error;
if (size) { error = 0;
if (size > XATTR_SIZE_MAX) if (ctx->size) {
if (ctx->size > XATTR_SIZE_MAX)
return -E2BIG; return -E2BIG;
kvalue = kvmalloc(size, GFP_KERNEL);
if (!kvalue) ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
return -ENOMEM; if (IS_ERR(ctx->kvalue)) {
if (copy_from_user(kvalue, value, size)) { error = PTR_ERR(ctx->kvalue);
error = -EFAULT; ctx->kvalue = NULL;
goto out;
} }
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
kvalue, size);
} }
error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags); return error;
out: }
kvfree(kvalue);
static void setxattr_convert(struct user_namespace *mnt_userns,
struct dentry *d, struct xattr_ctx *ctx)
{
if (ctx->size &&
((strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(ctx->kname->name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)))
posix_acl_fix_xattr_from_user(mnt_userns, d_inode(d),
ctx->kvalue, ctx->size);
}
int do_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct xattr_ctx *ctx)
{
setxattr_convert(mnt_userns, dentry, ctx);
return vfs_setxattr(mnt_userns, dentry, ctx->kname->name,
ctx->kvalue, ctx->size, ctx->flags);
}
static long
setxattr(struct user_namespace *mnt_userns, struct dentry *d,
const char __user *name, const void __user *value, size_t size,
int flags)
{
struct xattr_name kname;
struct xattr_ctx ctx = {
.cvalue = value,
.kvalue = NULL,
.size = size,
.kname = &kname,
.flags = flags,
};
int error;
error = setxattr_copy(name, &ctx);
if (error)
return error;
error = do_setxattr(mnt_userns, d, &ctx);
kvfree(ctx.kvalue);
return error; return error;
} }
...@@ -642,44 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, ...@@ -642,44 +676,61 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
/* /*
* Extended attribute GET operations * Extended attribute GET operations
*/ */
static ssize_t ssize_t
getxattr(struct user_namespace *mnt_userns, struct dentry *d, do_getxattr(struct user_namespace *mnt_userns, struct dentry *d,
const char __user *name, void __user *value, size_t size) struct xattr_ctx *ctx)
{ {
ssize_t error; ssize_t error;
void *kvalue = NULL; char *kname = ctx->kname->name;
char kname[XATTR_NAME_MAX + 1];
error = strncpy_from_user(kname, name, sizeof(kname));
if (error == 0 || error == sizeof(kname))
error = -ERANGE;
if (error < 0)
return error;
if (size) { if (ctx->size) {
if (size > XATTR_SIZE_MAX) if (ctx->size > XATTR_SIZE_MAX)
size = XATTR_SIZE_MAX; ctx->size = XATTR_SIZE_MAX;
kvalue = kvzalloc(size, GFP_KERNEL); ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
if (!kvalue) if (!ctx->kvalue)
return -ENOMEM; return -ENOMEM;
} }
error = vfs_getxattr(mnt_userns, d, kname, kvalue, size); error = vfs_getxattr(mnt_userns, d, kname, ctx->kvalue, ctx->size);
if (error > 0) { if (error > 0) {
if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
(strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0)) (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d), posix_acl_fix_xattr_to_user(mnt_userns, d_inode(d),
kvalue, error); ctx->kvalue, error);
if (size && copy_to_user(value, kvalue, error)) if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
error = -EFAULT; error = -EFAULT;
} else if (error == -ERANGE && size >= XATTR_SIZE_MAX) { } else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
/* The file system tried to returned a value bigger /* The file system tried to returned a value bigger
than XATTR_SIZE_MAX bytes. Not possible. */ than XATTR_SIZE_MAX bytes. Not possible. */
error = -E2BIG; error = -E2BIG;
} }
kvfree(kvalue); return error;
}
static ssize_t
getxattr(struct user_namespace *mnt_userns, struct dentry *d,
const char __user *name, void __user *value, size_t size)
{
ssize_t error;
struct xattr_name kname;
struct xattr_ctx ctx = {
.value = value,
.kvalue = NULL,
.size = size,
.kname = &kname,
.flags = 0,
};
error = strncpy_from_user(kname.name, name, sizeof(kname.name));
if (error == 0 || error == sizeof(kname.name))
error = -ERANGE;
if (error < 0)
return error;
error = do_getxattr(mnt_userns, d, &ctx);
kvfree(ctx.kvalue);
return error; return error;
} }
......
...@@ -10,6 +10,7 @@ struct sock *io_uring_get_socket(struct file *file); ...@@ -10,6 +10,7 @@ struct sock *io_uring_get_socket(struct file *file);
void __io_uring_cancel(bool cancel_all); void __io_uring_cancel(bool cancel_all);
void __io_uring_free(struct task_struct *tsk); void __io_uring_free(struct task_struct *tsk);
void io_uring_unreg_ringfd(void); void io_uring_unreg_ringfd(void);
const char *io_uring_get_opcode(u8 opcode);
static inline void io_uring_files_cancel(void) static inline void io_uring_files_cancel(void)
{ {
...@@ -42,6 +43,10 @@ static inline void io_uring_files_cancel(void) ...@@ -42,6 +43,10 @@ static inline void io_uring_files_cancel(void)
static inline void io_uring_free(struct task_struct *tsk) static inline void io_uring_free(struct task_struct *tsk)
{ {
} }
static inline const char *io_uring_get_opcode(u8 opcode)
{
return "";
}
#endif #endif
#endif #endif
...@@ -434,6 +434,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags, ...@@ -434,6 +434,7 @@ extern struct file *do_accept(struct file *file, unsigned file_flags,
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags); int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol); extern int __sys_socket(int family, int type, int protocol);
extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags); int addrlen, int file_flags);
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/tracepoint.h> #include <linux/tracepoint.h>
#include <uapi/linux/io_uring.h> #include <uapi/linux/io_uring.h>
#include <linux/io_uring.h>
struct io_wq_work; struct io_wq_work;
...@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work, ...@@ -169,8 +170,9 @@ TRACE_EVENT(io_uring_queue_async_work,
__entry->rw = rw; __entry->rw = rw;
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, flags 0x%x, %s queue, work %p", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work) __entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
); );
...@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer, ...@@ -205,8 +207,9 @@ TRACE_EVENT(io_uring_defer,
__entry->opcode = opcode; __entry->opcode = opcode;
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
__entry->ctx, __entry->req, __entry->data, __entry->opcode) __entry->ctx, __entry->req, __entry->data,
io_uring_get_opcode(__entry->opcode))
); );
/** /**
...@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link, ...@@ -305,9 +308,9 @@ TRACE_EVENT(io_uring_fail_link,
__entry->link = link; __entry->link = link;
), ),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %d, link %p", TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data,
__entry->link) io_uring_get_opcode(__entry->opcode), __entry->link)
); );
/** /**
...@@ -389,9 +392,9 @@ TRACE_EVENT(io_uring_submit_sqe, ...@@ -389,9 +392,9 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->sq_thread = sq_thread; __entry->sq_thread = sq_thread;
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, flags 0x%x, " TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req, "non block %d, sq_thread %d", __entry->ctx, __entry->req,
__entry->user_data, __entry->opcode, __entry->user_data, io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->force_nonblock, __entry->sq_thread) __entry->flags, __entry->force_nonblock, __entry->sq_thread)
); );
...@@ -433,8 +436,9 @@ TRACE_EVENT(io_uring_poll_arm, ...@@ -433,8 +436,9 @@ TRACE_EVENT(io_uring_poll_arm,
__entry->events = events; __entry->events = events;
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask 0x%x, events 0x%x", TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode),
__entry->mask, __entry->events) __entry->mask, __entry->events)
); );
...@@ -470,8 +474,9 @@ TRACE_EVENT(io_uring_task_add, ...@@ -470,8 +474,9 @@ TRACE_EVENT(io_uring_task_add,
__entry->mask = mask; __entry->mask = mask;
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %d, mask %x", TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
__entry->ctx, __entry->req, __entry->user_data, __entry->opcode, __entry->ctx, __entry->req, __entry->user_data,
io_uring_get_opcode(__entry->opcode),
__entry->mask) __entry->mask)
); );
...@@ -506,7 +511,7 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -506,7 +511,7 @@ TRACE_EVENT(io_uring_req_failed,
__field( u16, personality ) __field( u16, personality )
__field( u32, file_index ) __field( u32, file_index )
__field( u64, pad1 ) __field( u64, pad1 )
__field( u64, pad2 ) __field( u64, addr3 )
__field( int, error ) __field( int, error )
), ),
...@@ -525,22 +530,24 @@ TRACE_EVENT(io_uring_req_failed, ...@@ -525,22 +530,24 @@ TRACE_EVENT(io_uring_req_failed,
__entry->personality = sqe->personality; __entry->personality = sqe->personality;
__entry->file_index = sqe->file_index; __entry->file_index = sqe->file_index;
__entry->pad1 = sqe->__pad2[0]; __entry->pad1 = sqe->__pad2[0];
__entry->pad2 = sqe->__pad2[1]; __entry->addr3 = sqe->addr3;
__entry->error = error; __entry->error = error;
), ),
TP_printk("ring %p, req %p, user_data 0x%llx, " TP_printk("ring %p, req %p, user_data 0x%llx, "
"op %d, flags 0x%x, prio=%d, off=%llu, addr=%llu, " "opcode %s, flags 0x%x, prio=%d, off=%llu, addr=%llu, "
"len=%u, rw_flags=0x%x, buf_index=%d, " "len=%u, rw_flags=0x%x, buf_index=%d, "
"personality=%d, file_index=%d, pad=0x%llx/%llx, error=%d", "personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
"error=%d",
__entry->ctx, __entry->req, __entry->user_data, __entry->ctx, __entry->req, __entry->user_data,
__entry->opcode, __entry->flags, __entry->ioprio, io_uring_get_opcode(__entry->opcode),
__entry->flags, __entry->ioprio,
(unsigned long long)__entry->off, (unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len, (unsigned long long) __entry->addr, __entry->len,
__entry->op_flags, __entry->op_flags,
__entry->buf_index, __entry->personality, __entry->file_index, __entry->buf_index, __entry->personality, __entry->file_index,
(unsigned long long) __entry->pad1, (unsigned long long) __entry->pad1,
(unsigned long long) __entry->pad2, __entry->error) (unsigned long long) __entry->addr3, __entry->error)
); );
......
...@@ -45,6 +45,7 @@ struct io_uring_sqe { ...@@ -45,6 +45,7 @@ struct io_uring_sqe {
__u32 rename_flags; __u32 rename_flags;
__u32 unlink_flags; __u32 unlink_flags;
__u32 hardlink_flags; __u32 hardlink_flags;
__u32 xattr_flags;
}; };
__u64 user_data; /* data to be passed back at completion time */ __u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */ /* pack this to avoid bogus arm OABI complaints */
...@@ -60,7 +61,8 @@ struct io_uring_sqe { ...@@ -60,7 +61,8 @@ struct io_uring_sqe {
__s32 splice_fd_in; __s32 splice_fd_in;
__u32 file_index; __u32 file_index;
}; };
__u64 __pad2[2]; __u64 addr3;
__u64 __pad2[1];
}; };
enum { enum {
...@@ -117,7 +119,7 @@ enum { ...@@ -117,7 +119,7 @@ enum {
*/ */
#define IORING_SETUP_TASKRUN_FLAG (1U << 9) #define IORING_SETUP_TASKRUN_FLAG (1U << 9)
enum { enum io_uring_op {
IORING_OP_NOP, IORING_OP_NOP,
IORING_OP_READV, IORING_OP_READV,
IORING_OP_WRITEV, IORING_OP_WRITEV,
...@@ -159,6 +161,11 @@ enum { ...@@ -159,6 +161,11 @@ enum {
IORING_OP_SYMLINKAT, IORING_OP_SYMLINKAT,
IORING_OP_LINKAT, IORING_OP_LINKAT,
IORING_OP_MSG_RING, IORING_OP_MSG_RING,
IORING_OP_FSETXATTR,
IORING_OP_SETXATTR,
IORING_OP_FGETXATTR,
IORING_OP_GETXATTR,
IORING_OP_SOCKET,
/* this goes last, obviously */ /* this goes last, obviously */
IORING_OP_LAST, IORING_OP_LAST,
......
...@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags) ...@@ -504,7 +504,7 @@ static int sock_map_fd(struct socket *sock, int flags)
struct socket *sock_from_file(struct file *file) struct socket *sock_from_file(struct file *file)
{ {
if (file->f_op == &socket_file_ops) if (file->f_op == &socket_file_ops)
return file->private_data; /* set in sock_map_fd */ return file->private_data; /* set in sock_alloc_file */
return NULL; return NULL;
} }
...@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct ...@@ -1538,11 +1538,10 @@ int sock_create_kern(struct net *net, int family, int type, int protocol, struct
} }
EXPORT_SYMBOL(sock_create_kern); EXPORT_SYMBOL(sock_create_kern);
int __sys_socket(int family, int type, int protocol) static struct socket *__sys_socket_create(int family, int type, int protocol)
{ {
int retval;
struct socket *sock; struct socket *sock;
int flags; int retval;
/* Check the SOCK_* constants for consistency. */ /* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
...@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol) ...@@ -1550,17 +1549,50 @@ int __sys_socket(int family, int type, int protocol)
BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK); BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
flags = type & ~SOCK_TYPE_MASK; if ((type & ~SOCK_TYPE_MASK) & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) return ERR_PTR(-EINVAL);
return -EINVAL;
type &= SOCK_TYPE_MASK; type &= SOCK_TYPE_MASK;
retval = sock_create(family, type, protocol, &sock);
if (retval < 0)
return ERR_PTR(retval);
return sock;
}
struct file *__sys_socket_file(int family, int type, int protocol)
{
struct socket *sock;
struct file *file;
int flags;
sock = __sys_socket_create(family, type, protocol);
if (IS_ERR(sock))
return ERR_CAST(sock);
flags = type & ~SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK)) if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK; flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
retval = sock_create(family, type, protocol, &sock); file = sock_alloc_file(sock, flags, NULL);
if (retval < 0) if (IS_ERR(file))
return retval; sock_release(sock);
return file;
}
int __sys_socket(int family, int type, int protocol)
{
struct socket *sock;
int flags;
sock = __sys_socket_create(family, type, protocol);
if (IS_ERR(sock))
return PTR_ERR(sock);
flags = type & ~SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment