Commit 06a5464b authored by Pavel Begunkov's avatar Pavel Begunkov Committed by Jens Axboe

io_uring: wire send zc request type

Add a new io_uring opcode IORING_OP_SENDZC. The main distinction from
IORING_OP_SEND is that the user should specify a notification slot
index in sqe::notification_idx and the buffers are safe to reuse only
when the used notification is flushed and completes.
Signed-off-by: default avatarPavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a80387c6a68ce9cf99b3b6ef6f71068468761fb7.1657643355.git.asml.silence@gmail.comSigned-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent bc24d6bd
......@@ -66,6 +66,10 @@ struct io_uring_sqe {
union {
__s32 splice_fd_in;
__u32 file_index;
struct {
__u16 notification_idx;
__u16 __pad;
};
};
union {
struct {
......@@ -197,6 +201,7 @@ enum io_uring_op {
IORING_OP_GETXATTR,
IORING_OP_SOCKET,
IORING_OP_URING_CMD,
IORING_OP_SENDZC_NOTIF,
/* this goes last, obviously */
IORING_OP_LAST,
......
......@@ -14,6 +14,7 @@
#include "kbuf.h"
#include "alloc_cache.h"
#include "net.h"
#include "notif.h"
#if defined(CONFIG_NET)
struct io_shutdown {
......@@ -59,6 +60,15 @@ struct io_sr_msg {
unsigned int flags;
};
struct io_sendzc {
struct file *file;
void __user *buf;
size_t len;
u16 slot_idx;
unsigned msg_flags;
unsigned flags;
};
#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED)
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
......@@ -834,6 +844,90 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags)
return ret;
}
int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sendzc *zc = io_kiocb_to_cmd(req);
if (READ_ONCE(sqe->addr2) || READ_ONCE(sqe->__pad2[0]) ||
READ_ONCE(sqe->addr3))
return -EINVAL;
zc->flags = READ_ONCE(sqe->ioprio);
if (zc->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
zc->buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
zc->len = READ_ONCE(sqe->len);
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
zc->slot_idx = READ_ONCE(sqe->notification_idx);
if (zc->msg_flags & MSG_DONTWAIT)
req->flags |= REQ_F_NOWAIT;
#ifdef CONFIG_COMPAT
if (req->ctx->compat)
zc->msg_flags |= MSG_CMSG_COMPAT;
#endif
return 0;
}
int io_sendzc(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_sendzc *zc = io_kiocb_to_cmd(req);
struct io_notif_slot *notif_slot;
struct io_notif *notif;
struct msghdr msg;
struct iovec iov;
struct socket *sock;
unsigned msg_flags;
int ret, min_ret = 0;
if (!(req->flags & REQ_F_POLLED) &&
(zc->flags & IORING_RECVSEND_POLL_FIRST))
return -EAGAIN;
if (issue_flags & IO_URING_F_UNLOCKED)
return -EAGAIN;
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
notif_slot = io_get_notif_slot(ctx, zc->slot_idx);
if (!notif_slot)
return -EINVAL;
notif = io_get_notif(ctx, notif_slot);
if (!notif)
return -ENOMEM;
msg.msg_name = NULL;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
ret = import_single_range(WRITE, zc->buf, zc->len, &iov, &msg.msg_iter);
if (unlikely(ret))
return ret;
msg_flags = zc->msg_flags | MSG_ZEROCOPY;
if (issue_flags & IO_URING_F_NONBLOCK)
msg_flags |= MSG_DONTWAIT;
if (msg_flags & MSG_WAITALL)
min_ret = iov_iter_count(&msg.msg_iter);
msg.msg_flags = msg_flags;
msg.msg_ubuf = &notif->uarg;
msg.sg_from_iter = NULL;
ret = sock_sendmsg(sock, &msg);
if (unlikely(ret < min_ret)) {
if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
return -EAGAIN;
return ret == -ERESTARTSYS ? -EINTR : ret;
}
io_req_set_res(req, ret, 0);
return IOU_OK;
}
int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_accept *accept = io_kiocb_to_cmd(req);
......
......@@ -52,6 +52,9 @@ int io_connect_prep_async(struct io_kiocb *req);
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_connect(struct io_kiocb *req, unsigned int issue_flags);
int io_sendzc(struct io_kiocb *req, unsigned int issue_flags);
int io_sendzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
void io_netmsg_cache_free(struct io_cache_entry *entry);
#else
static inline void io_netmsg_cache_free(struct io_cache_entry *entry)
......
......@@ -470,6 +470,21 @@ const struct io_op_def io_op_defs[] = {
.issue = io_uring_cmd,
.prep_async = io_uring_cmd_prep_async,
},
[IORING_OP_SENDZC_NOTIF] = {
.name = "SENDZC_NOTIF",
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.audit_skip = 1,
.ioprio = 1,
#if defined(CONFIG_NET)
.prep = io_sendzc_prep,
.issue = io_sendzc,
#else
.prep = io_eopnotsupp_prep,
#endif
},
};
const char *io_uring_get_opcode(u8 opcode)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment