Commit 228e548e authored by Anton Blanchard's avatar Anton Blanchard Committed by David S. Miller

net: Add sendmmsg socket system call

This patch adds a multiple message send syscall and is the send
version of the existing recvmmsg syscall. This is heavily
based on the patch by Arnaldo that added recvmmsg.

I wrote a microbenchmark to test the performance gains of using
this new syscall:

http://ozlabs.org/~anton/junkcode/sendmmsg_test.c

The test was run on a ppc64 box with a 10 Gbit network card. The
benchmark can send both UDP and RAW ethernet packets.

64B UDP

batch   pkts/sec
1       804570
2       872800 (+ 8 %)
4       916556 (+14 %)
8       939712 (+17 %)
16      952688 (+18 %)
32      956448 (+19 %)
64      964800 (+20 %)

64B raw socket

batch   pkts/sec
1       1201449
2       1350028 (+12 %)
4       1461416 (+22 %)
8       1513080 (+26 %)
16      1541216 (+28 %)
32      1553440 (+29 %)
64      1557888 (+30 %)

We see a 20% improvement in throughput on UDP send and 30%
on raw socket send.

[ Add sparc syscall entries. -DaveM ]
Signed-off-by: default avatarAnton Blanchard <anton@samba.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1c5cae81
...@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at) ...@@ -352,3 +352,4 @@ SYSCALL_SPU(name_to_handle_at)
COMPAT_SYS_SPU(open_by_handle_at) COMPAT_SYS_SPU(open_by_handle_at)
COMPAT_SYS_SPU(clock_adjtime) COMPAT_SYS_SPU(clock_adjtime)
SYSCALL_SPU(syncfs) SYSCALL_SPU(syncfs)
COMPAT_SYS_SPU(sendmmsg)
...@@ -371,10 +371,11 @@ ...@@ -371,10 +371,11 @@
#define __NR_open_by_handle_at 346 #define __NR_open_by_handle_at 346
#define __NR_clock_adjtime 347 #define __NR_clock_adjtime 347
#define __NR_syncfs 348 #define __NR_syncfs 348
#define __NR_sendmmsg 349
#ifdef __KERNEL__ #ifdef __KERNEL__
#define __NR_syscalls 349 #define __NR_syscalls 350
#define __NR__exit __NR_exit #define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls #define NR_syscalls __NR_syscalls
......
...@@ -404,8 +404,9 @@ ...@@ -404,8 +404,9 @@
#define __NR_open_by_handle_at 333 #define __NR_open_by_handle_at 333
#define __NR_clock_adjtime 334 #define __NR_clock_adjtime 334
#define __NR_syncfs 335 #define __NR_syncfs 335
#define __NR_sendmmsg 336
#define NR_syscalls 336 #define NR_syscalls 337
#ifdef __32bit_syscall_numbers__ #ifdef __32bit_syscall_numbers__
/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
......
...@@ -84,4 +84,4 @@ sys_call_table: ...@@ -84,4 +84,4 @@ sys_call_table:
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init /*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
/*335*/ .long sys_syncfs /*335*/ .long sys_syncfs, sys_sendmmsg
...@@ -85,7 +85,7 @@ sys_call_table32: ...@@ -85,7 +85,7 @@ sys_call_table32:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime
.word sys_syncfs .word sys_syncfs, compat_sys_sendmmsg
#endif /* CONFIG_COMPAT */ #endif /* CONFIG_COMPAT */
...@@ -162,4 +162,4 @@ sys_call_table: ...@@ -162,4 +162,4 @@ sys_call_table:
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
/*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime
.word sys_syncfs .word sys_syncfs, sys_sendmmsg
...@@ -848,4 +848,5 @@ ia32_sys_call_table: ...@@ -848,4 +848,5 @@ ia32_sys_call_table:
.quad compat_sys_open_by_handle_at .quad compat_sys_open_by_handle_at
.quad compat_sys_clock_adjtime .quad compat_sys_clock_adjtime
.quad sys_syncfs .quad sys_syncfs
.quad compat_sys_sendmmsg /* 345 */
ia32_syscall_end: ia32_syscall_end:
...@@ -350,10 +350,11 @@ ...@@ -350,10 +350,11 @@
#define __NR_open_by_handle_at 342 #define __NR_open_by_handle_at 342
#define __NR_clock_adjtime 343 #define __NR_clock_adjtime 343
#define __NR_syncfs 344 #define __NR_syncfs 344
#define __NR_sendmmsg 345
#ifdef __KERNEL__ #ifdef __KERNEL__
#define NR_syscalls 345 #define NR_syscalls 346
#define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_READDIR
......
...@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) ...@@ -677,6 +677,8 @@ __SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at)
__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime)
#define __NR_syncfs 306 #define __NR_syncfs 306
__SYSCALL(__NR_syncfs, sys_syncfs) __SYSCALL(__NR_syncfs, sys_syncfs)
#define __NR_sendmmsg 307
__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
#ifndef __NO_STUBS #ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_READDIR
......
...@@ -344,3 +344,4 @@ ENTRY(sys_call_table) ...@@ -344,3 +344,4 @@ ENTRY(sys_call_table)
.long sys_open_by_handle_at .long sys_open_by_handle_at
.long sys_clock_adjtime .long sys_clock_adjtime
.long sys_syncfs .long sys_syncfs
.long sys_sendmmsg /* 345 */
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */ #define SYS_RECVMSG 17 /* sys_recvmsg(2) */
#define SYS_ACCEPT4 18 /* sys_accept4(2) */ #define SYS_ACCEPT4 18 /* sys_accept4(2) */
#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */ #define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */
#define SYS_SENDMMSG 20 /* sys_sendmmsg(2) */
typedef enum { typedef enum {
SS_FREE = 0, /* not allocated */ SS_FREE = 0, /* not allocated */
......
...@@ -333,5 +333,7 @@ struct timespec; ...@@ -333,5 +333,7 @@ struct timespec;
extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
unsigned int flags, struct timespec *timeout); unsigned int flags, struct timespec *timeout);
extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
unsigned int vlen, unsigned int flags);
#endif /* not kernel and not glibc */ #endif /* not kernel and not glibc */
#endif /* _LINUX_SOCKET_H */ #endif /* _LINUX_SOCKET_H */
...@@ -610,6 +610,8 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned); ...@@ -610,6 +610,8 @@ asmlinkage long sys_send(int, void __user *, size_t, unsigned);
asmlinkage long sys_sendto(int, void __user *, size_t, unsigned, asmlinkage long sys_sendto(int, void __user *, size_t, unsigned,
struct sockaddr __user *, int); struct sockaddr __user *, int);
asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags); asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
unsigned int vlen, unsigned flags);
asmlinkage long sys_recv(int, void __user *, size_t, unsigned); asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
struct sockaddr __user *, int __user *); struct sockaddr __user *, int __user *);
......
...@@ -43,6 +43,8 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *); ...@@ -43,6 +43,8 @@ extern int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
extern asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *,
unsigned, unsigned);
extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *, extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
unsigned, unsigned, unsigned, unsigned,
......
...@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt); ...@@ -46,7 +46,9 @@ cond_syscall(sys_getsockopt);
cond_syscall(compat_sys_getsockopt); cond_syscall(compat_sys_getsockopt);
cond_syscall(sys_shutdown); cond_syscall(sys_shutdown);
cond_syscall(sys_sendmsg); cond_syscall(sys_sendmsg);
cond_syscall(sys_sendmmsg);
cond_syscall(compat_sys_sendmsg); cond_syscall(compat_sys_sendmsg);
cond_syscall(compat_sys_sendmmsg);
cond_syscall(sys_recvmsg); cond_syscall(sys_recvmsg);
cond_syscall(sys_recvmmsg); cond_syscall(sys_recvmmsg);
cond_syscall(compat_sys_recvmsg); cond_syscall(compat_sys_recvmsg);
......
...@@ -722,11 +722,11 @@ EXPORT_SYMBOL(compat_mc_getsockopt); ...@@ -722,11 +722,11 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
/* Argument list sizes for compat_sys_socketcall */ /* Argument list sizes for compat_sys_socketcall */
#define AL(x) ((x) * sizeof(u32)) #define AL(x) ((x) * sizeof(u32))
static unsigned char nas[20] = { static unsigned char nas[21] = {
AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
AL(4), AL(5) AL(4), AL(5), AL(4)
}; };
#undef AL #undef AL
...@@ -735,6 +735,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, uns ...@@ -735,6 +735,13 @@ asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, uns
return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); return sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
} }
asmlinkage long compat_sys_sendmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags)
{
return __sys_sendmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT);
}
asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags) asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, unsigned int flags)
{ {
return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
...@@ -780,7 +787,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ...@@ -780,7 +787,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
u32 a[6]; u32 a[6];
u32 a0, a1; u32 a0, a1;
if (call < SYS_SOCKET || call > SYS_RECVMMSG) if (call < SYS_SOCKET || call > SYS_SENDMMSG)
return -EINVAL; return -EINVAL;
if (copy_from_user(a, args, nas[call])) if (copy_from_user(a, args, nas[call]))
return -EFAULT; return -EFAULT;
...@@ -839,6 +846,9 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ...@@ -839,6 +846,9 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
case SYS_SENDMSG: case SYS_SENDMSG:
ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]); ret = compat_sys_sendmsg(a0, compat_ptr(a1), a[2]);
break; break;
case SYS_SENDMMSG:
ret = compat_sys_sendmmsg(a0, compat_ptr(a1), a[2], a[3]);
break;
case SYS_RECVMSG: case SYS_RECVMSG:
ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break; break;
......
...@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) ...@@ -551,11 +551,10 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
} }
EXPORT_SYMBOL(sock_tx_timestamp); EXPORT_SYMBOL(sock_tx_timestamp);
static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size) struct msghdr *msg, size_t size)
{ {
struct sock_iocb *si = kiocb_to_siocb(iocb); struct sock_iocb *si = kiocb_to_siocb(iocb);
int err;
sock_update_classid(sock->sk); sock_update_classid(sock->sk);
...@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, ...@@ -564,13 +563,17 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
si->msg = msg; si->msg = msg;
si->size = size; si->size = size;
err = security_socket_sendmsg(sock, msg, size);
if (err)
return err;
return sock->ops->sendmsg(iocb, sock, msg, size); return sock->ops->sendmsg(iocb, sock, msg, size);
} }
static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size)
{
int err = security_socket_sendmsg(sock, msg, size);
return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
}
int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{ {
struct kiocb iocb; struct kiocb iocb;
...@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) ...@@ -586,6 +589,20 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
} }
EXPORT_SYMBOL(sock_sendmsg); EXPORT_SYMBOL(sock_sendmsg);
int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
int kernel_sendmsg(struct socket *sock, struct msghdr *msg, int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size) struct kvec *vec, size_t num, size_t size)
{ {
...@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) ...@@ -1863,57 +1880,47 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
/* static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
* BSD sendmsg interface struct msghdr *msg_sys, unsigned flags, int nosec)
*/
SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
{ {
struct compat_msghdr __user *msg_compat = struct compat_msghdr __user *msg_compat =
(struct compat_msghdr __user *)msg; (struct compat_msghdr __user *)msg;
struct socket *sock;
struct sockaddr_storage address; struct sockaddr_storage address;
struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
unsigned char ctl[sizeof(struct cmsghdr) + 20] unsigned char ctl[sizeof(struct cmsghdr) + 20]
__attribute__ ((aligned(sizeof(__kernel_size_t)))); __attribute__ ((aligned(sizeof(__kernel_size_t))));
/* 20 is size of ipv6_pktinfo */ /* 20 is size of ipv6_pktinfo */
unsigned char *ctl_buf = ctl; unsigned char *ctl_buf = ctl;
struct msghdr msg_sys;
int err, ctl_len, iov_size, total_len; int err, ctl_len, iov_size, total_len;
int fput_needed;
err = -EFAULT; err = -EFAULT;
if (MSG_CMSG_COMPAT & flags) { if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(&msg_sys, msg_compat)) if (get_compat_msghdr(msg_sys, msg_compat))
return -EFAULT; return -EFAULT;
} else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
return -EFAULT; return -EFAULT;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
/* do not move before msg_sys is valid */ /* do not move before msg_sys is valid */
err = -EMSGSIZE; err = -EMSGSIZE;
if (msg_sys.msg_iovlen > UIO_MAXIOV) if (msg_sys->msg_iovlen > UIO_MAXIOV)
goto out_put; goto out;
/* Check whether to allocate the iovec area */ /* Check whether to allocate the iovec area */
err = -ENOMEM; err = -ENOMEM;
iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
if (msg_sys.msg_iovlen > UIO_FASTIOV) { if (msg_sys->msg_iovlen > UIO_FASTIOV) {
iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
if (!iov) if (!iov)
goto out_put; goto out;
} }
/* This will also move the address data into kernel space */ /* This will also move the address data into kernel space */
if (MSG_CMSG_COMPAT & flags) { if (MSG_CMSG_COMPAT & flags) {
err = verify_compat_iovec(&msg_sys, iov, err = verify_compat_iovec(msg_sys, iov,
(struct sockaddr *)&address, (struct sockaddr *)&address,
VERIFY_READ); VERIFY_READ);
} else } else
err = verify_iovec(&msg_sys, iov, err = verify_iovec(msg_sys, iov,
(struct sockaddr *)&address, (struct sockaddr *)&address,
VERIFY_READ); VERIFY_READ);
if (err < 0) if (err < 0)
...@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) ...@@ -1922,17 +1929,17 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
err = -ENOBUFS; err = -ENOBUFS;
if (msg_sys.msg_controllen > INT_MAX) if (msg_sys->msg_controllen > INT_MAX)
goto out_freeiov; goto out_freeiov;
ctl_len = msg_sys.msg_controllen; ctl_len = msg_sys->msg_controllen;
if ((MSG_CMSG_COMPAT & flags) && ctl_len) { if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
err = err =
cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl,
sizeof(ctl)); sizeof(ctl));
if (err) if (err)
goto out_freeiov; goto out_freeiov;
ctl_buf = msg_sys.msg_control; ctl_buf = msg_sys->msg_control;
ctl_len = msg_sys.msg_controllen; ctl_len = msg_sys->msg_controllen;
} else if (ctl_len) { } else if (ctl_len) {
if (ctl_len > sizeof(ctl)) { if (ctl_len > sizeof(ctl)) {
ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
...@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) ...@@ -1941,21 +1948,22 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
} }
err = -EFAULT; err = -EFAULT;
/* /*
* Careful! Before this, msg_sys.msg_control contains a user pointer. * Careful! Before this, msg_sys->msg_control contains a user pointer.
* Afterwards, it will be a kernel pointer. Thus the compiler-assisted * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
* checking falls down on this. * checking falls down on this.
*/ */
if (copy_from_user(ctl_buf, if (copy_from_user(ctl_buf,
(void __user __force *)msg_sys.msg_control, (void __user __force *)msg_sys->msg_control,
ctl_len)) ctl_len))
goto out_freectl; goto out_freectl;
msg_sys.msg_control = ctl_buf; msg_sys->msg_control = ctl_buf;
} }
msg_sys.msg_flags = flags; msg_sys->msg_flags = flags;
if (sock->file->f_flags & O_NONBLOCK) if (sock->file->f_flags & O_NONBLOCK)
msg_sys.msg_flags |= MSG_DONTWAIT; msg_sys->msg_flags |= MSG_DONTWAIT;
err = sock_sendmsg(sock, &msg_sys, total_len); err = (nosec ? sock_sendmsg_nosec : sock_sendmsg)(sock, msg_sys,
total_len);
out_freectl: out_freectl:
if (ctl_buf != ctl) if (ctl_buf != ctl)
...@@ -1963,12 +1971,114 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) ...@@ -1963,12 +1971,114 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
out_freeiov: out_freeiov:
if (iov != iovstack) if (iov != iovstack)
sock_kfree_s(sock->sk, iov, iov_size); sock_kfree_s(sock->sk, iov, iov_size);
out_put: out:
return err;
}
/*
* BSD sendmsg interface
*/
SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
{
int fput_needed, err;
struct msghdr msg_sys;
struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
err = __sys_sendmsg(sock, msg, &msg_sys, flags, 0);
fput_light(sock->file, fput_needed); fput_light(sock->file, fput_needed);
out: out:
return err; return err;
} }
/*
* Linux sendmmsg interface
*/
int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
unsigned int flags)
{
int fput_needed, err, datagrams;
struct socket *sock;
struct mmsghdr __user *entry;
struct compat_mmsghdr __user *compat_entry;
struct msghdr msg_sys;
datagrams = 0;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
return err;
err = sock_error(sock->sk);
if (err)
goto out_put;
entry = mmsg;
compat_entry = (struct compat_mmsghdr __user *)mmsg;
while (datagrams < vlen) {
/*
* No need to ask LSM for more than the first datagram.
*/
if (MSG_CMSG_COMPAT & flags) {
err = __sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
&msg_sys, flags, datagrams);
if (err < 0)
break;
err = __put_user(err, &compat_entry->msg_len);
++compat_entry;
} else {
err = __sys_sendmsg(sock, (struct msghdr __user *)entry,
&msg_sys, flags, datagrams);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
++entry;
}
if (err)
break;
++datagrams;
}
out_put:
fput_light(sock->file, fput_needed);
if (err == 0)
return datagrams;
if (datagrams != 0) {
/*
* We may send less entries than requested (vlen) if the
* sock is non blocking...
*/
if (err != -EAGAIN) {
/*
* ... or if sendmsg returns an error after we
* send some datagrams, where we record the
* error to return on the next call or if the
* app asks about it using getsockopt(SO_ERROR).
*/
sock->sk->sk_err = -err;
}
return datagrams;
}
return err;
}
SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags)
{
return __sys_sendmmsg(fd, mmsg, vlen, flags);
}
static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
struct msghdr *msg_sys, unsigned flags, int nosec) struct msghdr *msg_sys, unsigned flags, int nosec)
{ {
...@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, ...@@ -2214,11 +2324,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
#ifdef __ARCH_WANT_SYS_SOCKETCALL #ifdef __ARCH_WANT_SYS_SOCKETCALL
/* Argument list sizes for sys_socketcall */ /* Argument list sizes for sys_socketcall */
#define AL(x) ((x) * sizeof(unsigned long)) #define AL(x) ((x) * sizeof(unsigned long))
static const unsigned char nargs[20] = { static const unsigned char nargs[21] = {
AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
AL(4), AL(5) AL(4), AL(5), AL(4)
}; };
#undef AL #undef AL
...@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) ...@@ -2238,7 +2348,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
int err; int err;
unsigned int len; unsigned int len;
if (call < 1 || call > SYS_RECVMMSG) if (call < 1 || call > SYS_SENDMMSG)
return -EINVAL; return -EINVAL;
len = nargs[call]; len = nargs[call];
...@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) ...@@ -2313,6 +2423,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
case SYS_SENDMSG: case SYS_SENDMSG:
err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
break; break;
case SYS_SENDMMSG:
err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
break;
case SYS_RECVMSG: case SYS_RECVMSG:
err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment