Commit a2e27255 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

net: Introduce recvmmsg socket syscall

Meaning receive multiple messages, reducing the number of syscalls and
net stack entry/exit operations.

Next patches will introduce mechanisms where protocols that want to
optimize this operation will provide an unlocked_recvmsg operation.

This takes into account comments made by:

. Paul Moore: sock_recvmsg is called only for the first datagram,
  sock_recvmsg_nosec is used for the rest.

. Caitlin Bestler: recvmmsg now has a struct timespec timeout, that
  works in the same fashion as the ppoll one.

  If the underlying protocol returns a datagram with MSG_OOB set, this
  will make recvmmsg return right away with as many datagrams (+ the OOB
  one) it has received so far.

. Rémi Denis-Courmont & Steven Whitehouse: If we receive N < vlen
  datagrams and then recvmsg returns an error, recvmmsg will return
  the successfully received datagrams, store the error and return it
  in the next call.

This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg,
where we will be able to acquire the lock only at batch start and end, not at
every underlying recvmsg call.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c05e85a0
...@@ -497,6 +497,7 @@ sys_call_table: ...@@ -497,6 +497,7 @@ sys_call_table:
.quad sys_signalfd .quad sys_signalfd
.quad sys_ni_syscall .quad sys_ni_syscall
.quad sys_eventfd .quad sys_eventfd
.quad sys_recvmmsg
.size sys_call_table, . - sys_call_table .size sys_call_table, . - sys_call_table
.type sys_call_table, @object .type sys_call_table, @object
......
...@@ -374,6 +374,7 @@ ...@@ -374,6 +374,7 @@
CALL(sys_pwritev) CALL(sys_pwritev)
CALL(sys_rt_tgsigqueueinfo) CALL(sys_rt_tgsigqueueinfo)
CALL(sys_perf_event_open) CALL(sys_perf_event_open)
/* 365 */ CALL(sys_recvmmsg)
#ifndef syscalls_counted #ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted #define syscalls_counted
......
...@@ -295,4 +295,5 @@ sys_call_table: ...@@ -295,4 +295,5 @@ sys_call_table:
.long sys_signalfd .long sys_signalfd
.long sys_ni_syscall /* 280, was sys_timerfd */ .long sys_ni_syscall /* 280, was sys_timerfd */
.long sys_eventfd .long sys_eventfd
.long sys_recvmmsg
.long sys_ni_syscall /* r8 is saturated at nr_syscalls */ .long sys_ni_syscall /* r8 is saturated at nr_syscalls */
...@@ -1621,6 +1621,7 @@ ENTRY(_sys_call_table) ...@@ -1621,6 +1621,7 @@ ENTRY(_sys_call_table)
.long _sys_pwritev .long _sys_pwritev
.long _sys_rt_tgsigqueueinfo .long _sys_rt_tgsigqueueinfo
.long _sys_perf_event_open .long _sys_perf_event_open
.long _sys_recvmmsg /* 370 */
.rept NR_syscalls-(.-_sys_call_table)/4 .rept NR_syscalls-(.-_sys_call_table)/4
.long _sys_ni_syscall .long _sys_ni_syscall
......
...@@ -1806,6 +1806,7 @@ sys_call_table: ...@@ -1806,6 +1806,7 @@ sys_call_table:
data8 sys_preadv data8 sys_preadv
data8 sys_pwritev // 1320 data8 sys_pwritev // 1320
data8 sys_rt_tgsigqueueinfo data8 sys_rt_tgsigqueueinfo
data8 sys_recvmmsg
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
...@@ -371,3 +371,4 @@ ENTRY(sys_call_table) ...@@ -371,3 +371,4 @@ ENTRY(sys_call_table)
.long sys_ni_syscall .long sys_ni_syscall
.long sys_rt_tgsigqueueinfo /* 365 */ .long sys_rt_tgsigqueueinfo /* 365 */
.long sys_perf_event_open .long sys_perf_event_open
.long sys_recvmmsg
...@@ -583,6 +583,7 @@ einval: li v0, -ENOSYS ...@@ -583,6 +583,7 @@ einval: li v0, -ENOSYS
sys sys_rt_tgsigqueueinfo 4 sys sys_rt_tgsigqueueinfo 4
sys sys_perf_event_open 5 sys sys_perf_event_open 5
sys sys_accept4 4 sys sys_accept4 4
sys sys_recvmmsg 5
.endm .endm
/* We pre-compute the number of _instruction_ bytes needed to /* We pre-compute the number of _instruction_ bytes needed to
......
...@@ -420,4 +420,5 @@ sys_call_table: ...@@ -420,4 +420,5 @@ sys_call_table:
PTR sys_rt_tgsigqueueinfo PTR sys_rt_tgsigqueueinfo
PTR sys_perf_event_open PTR sys_perf_event_open
PTR sys_accept4 PTR sys_accept4
PTR sys_recvmmsg
.size sys_call_table,.-sys_call_table .size sys_call_table,.-sys_call_table
...@@ -418,4 +418,5 @@ EXPORT(sysn32_call_table) ...@@ -418,4 +418,5 @@ EXPORT(sysn32_call_table)
PTR compat_sys_rt_tgsigqueueinfo /* 5295 */ PTR compat_sys_rt_tgsigqueueinfo /* 5295 */
PTR sys_perf_event_open PTR sys_perf_event_open
PTR sys_accept4 PTR sys_accept4
PTR compat_sys_recvmmsg
.size sysn32_call_table,.-sysn32_call_table .size sysn32_call_table,.-sysn32_call_table
...@@ -538,4 +538,5 @@ sys_call_table: ...@@ -538,4 +538,5 @@ sys_call_table:
PTR compat_sys_rt_tgsigqueueinfo PTR compat_sys_rt_tgsigqueueinfo
PTR sys_perf_event_open PTR sys_perf_event_open
PTR sys_accept4 PTR sys_accept4
PTR compat_sys_recvmmsg
.size sys_call_table,.-sys_call_table .size sys_call_table,.-sys_call_table
...@@ -391,3 +391,4 @@ sys_call_table: ...@@ -391,3 +391,4 @@ sys_call_table:
.long sys_pwritev .long sys_pwritev
.long sys_rt_tgsigqueueinfo .long sys_rt_tgsigqueueinfo
.long sys_perf_event_open .long sys_perf_event_open
.long sys_recvmmsg /* 365 */
...@@ -82,5 +82,5 @@ sys_call_table: ...@@ -82,5 +82,5 @@ sys_call_table:
/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
/*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open /*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
...@@ -83,7 +83,7 @@ sys_call_table32: ...@@ -83,7 +83,7 @@ sys_call_table32:
/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg
#endif /* CONFIG_COMPAT */ #endif /* CONFIG_COMPAT */
...@@ -158,4 +158,4 @@ sys_call_table: ...@@ -158,4 +158,4 @@ sys_call_table:
/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
...@@ -832,4 +832,5 @@ ia32_sys_call_table: ...@@ -832,4 +832,5 @@ ia32_sys_call_table:
.quad compat_sys_pwritev .quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */ .quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_event_open .quad sys_perf_event_open
.quad compat_sys_recvmmsg
ia32_syscall_end: ia32_syscall_end:
...@@ -342,10 +342,11 @@ ...@@ -342,10 +342,11 @@
#define __NR_pwritev 334 #define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335 #define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336 #define __NR_perf_event_open 336
#define __NR_recvmmsg 337
#ifdef __KERNEL__ #ifdef __KERNEL__
#define NR_syscalls 337 #define NR_syscalls 338
#define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_READDIR
......
...@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev) ...@@ -661,6 +661,8 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
#define __NR_perf_event_open 298 #define __NR_perf_event_open 298
__SYSCALL(__NR_perf_event_open, sys_perf_event_open) __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __NR_recvmmsg 299
__SYSCALL(__NR_recvmmsg, sys_recvmmsg)
#ifndef __NO_STUBS #ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_READDIR
......
...@@ -336,3 +336,4 @@ ENTRY(sys_call_table) ...@@ -336,3 +336,4 @@ ENTRY(sys_call_table)
.long sys_pwritev .long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */ .long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open .long sys_perf_event_open
.long sys_recvmmsg
...@@ -681,8 +681,10 @@ __SYSCALL(304, sys_signalfd, 3) ...@@ -681,8 +681,10 @@ __SYSCALL(304, sys_signalfd, 3)
__SYSCALL(305, sys_ni_syscall, 0) __SYSCALL(305, sys_ni_syscall, 0)
#define __NR_eventfd 306 #define __NR_eventfd 306
__SYSCALL(306, sys_eventfd, 1) __SYSCALL(306, sys_eventfd, 1)
#define __NR_recvmmsg 307
__SYSCALL(307, sys_recvmmsg, 5)
#define __NR_syscall_count 307 #define __NR_syscall_count 308
/* /*
* sysxtensa syscall handler * sysxtensa syscall handler
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#define SYS_SENDMSG 16 /* sys_sendmsg(2) */ #define SYS_SENDMSG 16 /* sys_sendmsg(2) */
#define SYS_RECVMSG 17 /* sys_recvmsg(2) */ #define SYS_RECVMSG 17 /* sys_recvmsg(2) */
#define SYS_ACCEPT4 18 /* sys_accept4(2) */ #define SYS_ACCEPT4 18 /* sys_accept4(2) */
#define SYS_RECVMMSG 19 /* sys_recvmmsg(2) */
typedef enum { typedef enum {
SS_FREE = 0, /* not allocated */ SS_FREE = 0, /* not allocated */
......
...@@ -65,6 +65,12 @@ struct msghdr { ...@@ -65,6 +65,12 @@ struct msghdr {
unsigned msg_flags; unsigned msg_flags;
}; };
/* For recvmmsg/sendmmsg */
struct mmsghdr {
struct msghdr msg_hdr;
unsigned msg_len;
};
/* /*
* POSIX 1003.1g - ancillary data object information * POSIX 1003.1g - ancillary data object information
* Ancillary data consits of a sequence of pairs of * Ancillary data consits of a sequence of pairs of
...@@ -312,6 +318,10 @@ extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uadd ...@@ -312,6 +318,10 @@ extern int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uadd
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr); extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
struct timespec;
extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
unsigned int flags, struct timespec *timeout);
#endif #endif
#endif /* not kernel and not glibc */ #endif /* not kernel and not glibc */
#endif /* _LINUX_SOCKET_H */ #endif /* _LINUX_SOCKET_H */
...@@ -25,6 +25,7 @@ struct linux_dirent64; ...@@ -25,6 +25,7 @@ struct linux_dirent64;
struct list_head; struct list_head;
struct msgbuf; struct msgbuf;
struct msghdr; struct msghdr;
struct mmsghdr;
struct msqid_ds; struct msqid_ds;
struct new_utsname; struct new_utsname;
struct nfsctl_arg; struct nfsctl_arg;
...@@ -677,6 +678,9 @@ asmlinkage long sys_recv(int, void __user *, size_t, unsigned); ...@@ -677,6 +678,9 @@ asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned, asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
struct sockaddr __user *, int __user *); struct sockaddr __user *, int __user *);
asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags); asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
unsigned int vlen, unsigned flags,
struct timespec __user *timeout);
asmlinkage long sys_socket(int, int, int); asmlinkage long sys_socket(int, int, int);
asmlinkage long sys_socketpair(int, int, int, int __user *); asmlinkage long sys_socketpair(int, int, int, int __user *);
asmlinkage long sys_socketcall(int call, unsigned long __user *args); asmlinkage long sys_socketcall(int call, unsigned long __user *args);
......
...@@ -18,6 +18,11 @@ struct compat_msghdr { ...@@ -18,6 +18,11 @@ struct compat_msghdr {
compat_uint_t msg_flags; compat_uint_t msg_flags;
}; };
struct compat_mmsghdr {
struct compat_msghdr msg_hdr;
compat_uint_t msg_len;
};
struct compat_cmsghdr { struct compat_cmsghdr {
compat_size_t cmsg_len; compat_size_t cmsg_len;
compat_int_t cmsg_level; compat_int_t cmsg_level;
...@@ -35,6 +40,9 @@ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *); ...@@ -35,6 +40,9 @@ extern int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int); extern int verify_compat_iovec(struct msghdr *, struct iovec *, struct sockaddr *, int);
extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_sendmsg(int,struct compat_msghdr __user *,unsigned);
extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned); extern asmlinkage long compat_sys_recvmsg(int,struct compat_msghdr __user *,unsigned);
extern asmlinkage long compat_sys_recvmmsg(int, struct compat_mmsghdr __user *,
unsigned, unsigned,
struct timespec __user *);
extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *); extern asmlinkage long compat_sys_getsockopt(int, int, int, char __user *, int __user *);
extern int put_cmsg_compat(struct msghdr*, int, int, int, void *); extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);
......
...@@ -48,8 +48,10 @@ cond_syscall(sys_shutdown); ...@@ -48,8 +48,10 @@ cond_syscall(sys_shutdown);
cond_syscall(sys_sendmsg); cond_syscall(sys_sendmsg);
cond_syscall(compat_sys_sendmsg); cond_syscall(compat_sys_sendmsg);
cond_syscall(sys_recvmsg); cond_syscall(sys_recvmsg);
cond_syscall(sys_recvmmsg);
cond_syscall(compat_sys_recvmsg); cond_syscall(compat_sys_recvmsg);
cond_syscall(compat_sys_recvfrom); cond_syscall(compat_sys_recvfrom);
cond_syscall(compat_sys_recvmmsg);
cond_syscall(sys_socketcall); cond_syscall(sys_socketcall);
cond_syscall(sys_futex); cond_syscall(sys_futex);
cond_syscall(compat_sys_futex); cond_syscall(compat_sys_futex);
......
...@@ -727,10 +727,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt); ...@@ -727,10 +727,10 @@ EXPORT_SYMBOL(compat_mc_getsockopt);
/* Argument list sizes for compat_sys_socketcall */ /* Argument list sizes for compat_sys_socketcall */
#define AL(x) ((x) * sizeof(u32)) #define AL(x) ((x) * sizeof(u32))
static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
AL(4)}; AL(4),AL(5)};
#undef AL #undef AL
asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
...@@ -755,13 +755,36 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len, ...@@ -755,13 +755,36 @@ asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen); return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen);
} }
asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
unsigned vlen, unsigned int flags,
struct timespec __user *timeout)
{
int datagrams;
struct timespec ktspec;
struct compat_timespec __user *utspec =
(struct compat_timespec __user *)timeout;
if (get_user(ktspec.tv_sec, &utspec->tv_sec) ||
get_user(ktspec.tv_nsec, &utspec->tv_nsec))
return -EFAULT;
datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, &ktspec);
if (datagrams > 0 &&
(put_user(ktspec.tv_sec, &utspec->tv_sec) ||
put_user(ktspec.tv_nsec, &utspec->tv_nsec)))
datagrams = -EFAULT;
return datagrams;
}
asmlinkage long compat_sys_socketcall(int call, u32 __user *args) asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
{ {
int ret; int ret;
u32 a[6]; u32 a[6];
u32 a0, a1; u32 a0, a1;
if (call < SYS_SOCKET || call > SYS_ACCEPT4) if (call < SYS_SOCKET || call > SYS_RECVMMSG)
return -EINVAL; return -EINVAL;
if (copy_from_user(a, args, nas[call])) if (copy_from_user(a, args, nas[call]))
return -EFAULT; return -EFAULT;
...@@ -823,6 +846,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ...@@ -823,6 +846,10 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
case SYS_RECVMSG: case SYS_RECVMSG:
ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break; break;
case SYS_RECVMMSG:
ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
compat_ptr(a[4]));
break;
case SYS_ACCEPT4: case SYS_ACCEPT4:
ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
break; break;
......
...@@ -683,10 +683,9 @@ void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, ...@@ -683,10 +683,9 @@ void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
} }
EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags) struct msghdr *msg, size_t size, int flags)
{ {
int err;
struct sock_iocb *si = kiocb_to_siocb(iocb); struct sock_iocb *si = kiocb_to_siocb(iocb);
si->sock = sock; si->sock = sock;
...@@ -695,13 +694,17 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, ...@@ -695,13 +694,17 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
si->size = size; si->size = size;
si->flags = flags; si->flags = flags;
err = security_socket_recvmsg(sock, msg, size, flags);
if (err)
return err;
return sock->ops->recvmsg(iocb, sock, msg, size, flags); return sock->ops->recvmsg(iocb, sock, msg, size, flags);
} }
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
int err = security_socket_recvmsg(sock, msg, size, flags);
return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
}
int sock_recvmsg(struct socket *sock, struct msghdr *msg, int sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags) size_t size, int flags)
{ {
...@@ -717,6 +720,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, ...@@ -717,6 +720,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
return ret; return ret;
} }
static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
int kernel_recvmsg(struct socket *sock, struct msghdr *msg, int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size, int flags) struct kvec *vec, size_t num, size_t size, int flags)
{ {
...@@ -1983,22 +2001,15 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) ...@@ -1983,22 +2001,15 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
return err; return err;
} }
/* static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
* BSD recvmsg interface struct msghdr *msg_sys, unsigned flags, int nosec)
*/
SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
unsigned int, flags)
{ {
struct compat_msghdr __user *msg_compat = struct compat_msghdr __user *msg_compat =
(struct compat_msghdr __user *)msg; (struct compat_msghdr __user *)msg;
struct socket *sock;
struct iovec iovstack[UIO_FASTIOV]; struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack; struct iovec *iov = iovstack;
struct msghdr msg_sys;
unsigned long cmsg_ptr; unsigned long cmsg_ptr;
int err, iov_size, total_len, len; int err, iov_size, total_len, len;
int fput_needed;
/* kernel mode address */ /* kernel mode address */
struct sockaddr_storage addr; struct sockaddr_storage addr;
...@@ -2008,27 +2019,23 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, ...@@ -2008,27 +2019,23 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
int __user *uaddr_len; int __user *uaddr_len;
if (MSG_CMSG_COMPAT & flags) { if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(&msg_sys, msg_compat)) if (get_compat_msghdr(msg_sys, msg_compat))
return -EFAULT; return -EFAULT;
} }
else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
return -EFAULT; return -EFAULT;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
err = -EMSGSIZE; err = -EMSGSIZE;
if (msg_sys.msg_iovlen > UIO_MAXIOV) if (msg_sys->msg_iovlen > UIO_MAXIOV)
goto out_put; goto out;
/* Check whether to allocate the iovec area */ /* Check whether to allocate the iovec area */
err = -ENOMEM; err = -ENOMEM;
iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
if (msg_sys.msg_iovlen > UIO_FASTIOV) { if (msg_sys->msg_iovlen > UIO_FASTIOV) {
iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
if (!iov) if (!iov)
goto out_put; goto out;
} }
/* /*
...@@ -2036,46 +2043,47 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, ...@@ -2036,46 +2043,47 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
* kernel msghdr to use the kernel address space) * kernel msghdr to use the kernel address space)
*/ */
uaddr = (__force void __user *)msg_sys.msg_name; uaddr = (__force void __user *)msg_sys->msg_name;
uaddr_len = COMPAT_NAMELEN(msg); uaddr_len = COMPAT_NAMELEN(msg);
if (MSG_CMSG_COMPAT & flags) { if (MSG_CMSG_COMPAT & flags) {
err = verify_compat_iovec(&msg_sys, iov, err = verify_compat_iovec(msg_sys, iov,
(struct sockaddr *)&addr, (struct sockaddr *)&addr,
VERIFY_WRITE); VERIFY_WRITE);
} else } else
err = verify_iovec(&msg_sys, iov, err = verify_iovec(msg_sys, iov,
(struct sockaddr *)&addr, (struct sockaddr *)&addr,
VERIFY_WRITE); VERIFY_WRITE);
if (err < 0) if (err < 0)
goto out_freeiov; goto out_freeiov;
total_len = err; total_len = err;
cmsg_ptr = (unsigned long)msg_sys.msg_control; cmsg_ptr = (unsigned long)msg_sys->msg_control;
msg_sys.msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
if (sock->file->f_flags & O_NONBLOCK) if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT; flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg_sys, total_len, flags); err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
total_len, flags);
if (err < 0) if (err < 0)
goto out_freeiov; goto out_freeiov;
len = err; len = err;
if (uaddr != NULL) { if (uaddr != NULL) {
err = move_addr_to_user((struct sockaddr *)&addr, err = move_addr_to_user((struct sockaddr *)&addr,
msg_sys.msg_namelen, uaddr, msg_sys->msg_namelen, uaddr,
uaddr_len); uaddr_len);
if (err < 0) if (err < 0)
goto out_freeiov; goto out_freeiov;
} }
err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
COMPAT_FLAGS(msg)); COMPAT_FLAGS(msg));
if (err) if (err)
goto out_freeiov; goto out_freeiov;
if (MSG_CMSG_COMPAT & flags) if (MSG_CMSG_COMPAT & flags)
err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
&msg_compat->msg_controllen); &msg_compat->msg_controllen);
else else
err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
&msg->msg_controllen); &msg->msg_controllen);
if (err) if (err)
goto out_freeiov; goto out_freeiov;
...@@ -2084,21 +2092,150 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, ...@@ -2084,21 +2092,150 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
out_freeiov: out_freeiov:
if (iov != iovstack) if (iov != iovstack)
sock_kfree_s(sock->sk, iov, iov_size); sock_kfree_s(sock->sk, iov, iov_size);
out_put: out:
return err;
}
/*
* BSD recvmsg interface
*/
SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
unsigned int, flags)
{
int fput_needed, err;
struct msghdr msg_sys;
struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
fput_light(sock->file, fput_needed); fput_light(sock->file, fput_needed);
out: out:
return err; return err;
} }
#ifdef __ARCH_WANT_SYS_SOCKETCALL /*
* Linux recvmmsg interface
*/
int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
unsigned int flags, struct timespec *timeout)
{
int fput_needed, err, datagrams;
struct socket *sock;
struct mmsghdr __user *entry;
struct msghdr msg_sys;
struct timespec end_time;
if (timeout &&
poll_select_set_timeout(&end_time, timeout->tv_sec,
timeout->tv_nsec))
return -EINVAL;
datagrams = 0;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
return err;
err = sock_error(sock->sk);
if (err)
goto out_put;
entry = mmsg;
while (datagrams < vlen) {
/*
* No need to ask LSM for more than the first datagram.
*/
err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
&msg_sys, flags, datagrams);
if (err < 0)
break;
err = put_user(err, &entry->msg_len);
if (err)
break;
++entry;
++datagrams;
if (timeout) {
ktime_get_ts(timeout);
*timeout = timespec_sub(end_time, *timeout);
if (timeout->tv_sec < 0) {
timeout->tv_sec = timeout->tv_nsec = 0;
break;
}
/* Timeout, return less than vlen datagrams */
if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
break;
}
/* Out of band data, return right away */
if (msg_sys.msg_flags & MSG_OOB)
break;
}
out_put:
fput_light(sock->file, fput_needed);
if (err == 0)
return datagrams;
if (datagrams != 0) {
/*
* We may return less entries than requested (vlen) if the
* sock is non block and there aren't enough datagrams...
*/
if (err != -EAGAIN) {
/*
* ... or if recvmsg returns an error after we
* received some datagrams, where we record the
* error to return on the next call or if the
* app asks about it using getsockopt(SO_ERROR).
*/
sock->sk->sk_err = -err;
}
return datagrams;
}
return err;
}
SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
unsigned int, vlen, unsigned int, flags,
struct timespec __user *, timeout)
{
int datagrams;
struct timespec timeout_sys;
if (!timeout)
return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
return -EFAULT;
datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
if (datagrams > 0 &&
copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
datagrams = -EFAULT;
return datagrams;
}
#ifdef __ARCH_WANT_SYS_SOCKETCALL
/* Argument list sizes for sys_socketcall */ /* Argument list sizes for sys_socketcall */
#define AL(x) ((x) * sizeof(unsigned long)) #define AL(x) ((x) * sizeof(unsigned long))
static const unsigned char nargs[19]={ static const unsigned char nargs[20] = {
AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
AL(4) AL(4),AL(5)
}; };
#undef AL #undef AL
...@@ -2118,7 +2255,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) ...@@ -2118,7 +2255,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
int err; int err;
unsigned int len; unsigned int len;
if (call < 1 || call > SYS_ACCEPT4) if (call < 1 || call > SYS_RECVMMSG)
return -EINVAL; return -EINVAL;
len = nargs[call]; len = nargs[call];
...@@ -2196,6 +2333,10 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) ...@@ -2196,6 +2333,10 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
case SYS_RECVMSG: case SYS_RECVMSG:
err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
break; break;
case SYS_RECVMMSG:
err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
(struct timespec __user *)a[4]);
break;
case SYS_ACCEPT4: case SYS_ACCEPT4:
err = sys_accept4(a0, (struct sockaddr __user *)a1, err = sys_accept4(a0, (struct sockaddr __user *)a1,
(int __user *)a[2], a[3]); (int __user *)a[2], a[3]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment