Commit ca3f74aa authored by Roland McGrath's avatar Roland McGrath Committed by Linus Torvalds

[PATCH] waitid system call

This patch adds a new system call `waitid'.  This is a new POSIX call that
subsumes the rest of the wait* family and can do some things the older
calls cannot.  A minor addition is the ability to select what kinds of
status to check for with a mask of independent bits, so you can wait for
just stops and not terminations, for example.  A more significant
improvement is the WNOWAIT flag, which allows for polling child status
without reaping.  This interface fills in a siginfo_t with the same details
that a SIGCHLD for the status change has; some of that info (e.g.  si_uid)
is not available via wait4 or other calls.

I've added a new system call that has the parameter conventions of the
POSIX function because that seems like the cleanest thing.  This patch
includes the actual system call table additions for i386 and x86-64; other
architectures will need to assign the system call number, and 64-bit ones
may need to implement 32-bit compat support for it as I did for x86-64. 
The new features could instead be provided by some new kludge inventions in
the wait4 system call interface (that's what BSD did).  If kludges are
preferable to adding a system call, I can work up something different.

I added a struct rusage field si_rusage to siginfo_t in the SIGCHLD case
(this does not affect the size or layout of the struct).  This is not part
of the POSIX interface, but it makes it so that `waitid' subsumes all the
functionality of `wait4'.  Future kernel ABIs (new arch's or whatnot) can
have only the `waitid' system call and the rest of the wait* family
including wait3 and wait4 can be implemented in user space using waitid.
There is nothing in user space as yet that would make use of the new field.

Most of the new functionality is implemented purely in the waitid system
call itself.  POSIX also provides for the WCONTINUED flag to report when a
child process had been stopped by job control and then resumed with
SIGCONT.  Corresponding to this, a SIGCHLD is now generated when a child
resumes (unless SA_NOCLDSTOP is set), with the value CLD_CONTINUED in
siginfo_t.si_code.  To implement this, some additional bookkeeping is
required in the signal code handling job control stops.

The motivation for this work is to make it possible to implement the POSIX
semantics of the `waitid' function in glibc completely and correctly.  If
changing either the system call interface used to accomplish that, or any
details of the kernel implementation work, would improve the chances of
getting this incorporated, I am more than happy to work through any issues.
Signed-off-by: default avatarRoland McGrath <roland@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 4c746d40
......@@ -900,5 +900,6 @@ ENTRY(sys_call_table)
.long sys_mq_notify
.long sys_mq_getsetattr
.long sys_ni_syscall /* reserved for kexec */
.long sys_waitid
syscall_table_size=(.-sys_call_table)
......@@ -74,6 +74,8 @@ int ia32_copy_siginfo_to_user(siginfo_t32 __user *to, siginfo_t *from)
err |= __put_user(from->si_utime, &to->si_utime);
err |= __put_user(from->si_stime, &to->si_stime);
err |= __put_user(from->si_status, &to->si_status);
err |= put_compat_rusage(&from->si_rusage,
&to->si_rusage);
default:
case __SI_KILL >> 16:
err |= __put_user(from->si_uid, &to->si_uid);
......
......@@ -586,6 +586,7 @@ ia32_sys_call_table:
.quad compat_sys_mq_notify
.quad compat_sys_mq_getsetattr
.quad quiet_ni_syscall /* reserved for kexec */
.quad sys32_waitid
/* don't forget to change IA32_NR_syscalls */
ia32_syscall_end:
.rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
......
......@@ -1151,6 +1151,25 @@ asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
asmlinkage long sys32_waitid(int which, compat_pid_t pid,
siginfo_t32 __user *uinfo, int options)
{
siginfo_t info;
long ret;
mm_segment_t old_fs = get_fs();
info.si_signo = 0;
set_fs (KERNEL_DS);
ret = sys_waitid(which, pid, (siginfo_t __user *) &info, options);
set_fs (old_fs);
if (ret < 0 || info.si_signo == 0)
return ret;
BUG_ON(info.si_code & __SI_MASK);
info.si_code |= __SI_CHLD;
return ia32_copy_siginfo_to_user(uinfo, &info);
}
/*
* Some system calls that need sign extended arguments. This could be done by a generic wrapper.
*/
......
......@@ -3,6 +3,7 @@
#include <linux/compiler.h>
#include <linux/types.h>
#include <linux/resource.h>
typedef union sigval {
int sival_int;
......@@ -74,6 +75,7 @@ typedef struct siginfo {
int _status; /* exit code */
clock_t _utime;
clock_t _stime;
struct rusage _rusage;
} _sigchld;
/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
......@@ -105,6 +107,7 @@ typedef struct siginfo {
#define si_status _sifields._sigchld._status
#define si_utime _sifields._sigchld._utime
#define si_stime _sifields._sigchld._stime
#define si_rusage _sifields._sigchld._rusage
#define si_value _sifields._rt._sigval
#define si_int _sifields._rt._sigval.sival_int
#define si_ptr _sifields._rt._sigval.sival_ptr
......
......@@ -289,8 +289,9 @@
#define __NR_mq_notify (__NR_mq_open+4)
#define __NR_mq_getsetattr (__NR_mq_open+5)
#define __NR_sys_kexec_load 283
#define __NR_waitid 284
#define NR_syscalls 284
#define NR_syscalls 285
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
......
......@@ -56,6 +56,7 @@ typedef struct siginfo {
int _status; /* exit code */
clock_t _utime;
clock_t _stime;
struct rusage _rusage;
} _sigchld;
/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
......
......@@ -115,6 +115,7 @@ typedef struct siginfo32 {
int _status; /* exit code */
compat_clock_t _utime;
compat_clock_t _stime;
struct compat_rusage _rusage;
} _sigchld;
/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
......
......@@ -289,7 +289,8 @@
#define __NR_ia32_mq_notify (__NR_ia32_mq_open+4)
#define __NR_ia32_mq_getsetattr (__NR_ia32_mq_open+5)
#define __NR_ia32_kexec 283
#define __NR_ia32_waitid 284
#define IA32_NR_syscalls 287 /* must be > than biggest syscall! */
#define IA32_NR_syscalls 285 /* must be > than biggest syscall! */
#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
......@@ -554,8 +554,10 @@ __SYSCALL(__NR_mq_notify, sys_mq_notify)
__SYSCALL(__NR_mq_getsetattr, sys_mq_getsetattr)
#define __NR_kexec_load 246
__SYSCALL(__NR_kexec_load, sys_ni_syscall)
#define __NR_waitid (253)
__SYSCALL(__NR_waitid, sys_waitid)
#define __NR_syscall_max __NR_kexec_load
#define __NR_syscall_max __NR_waitid
#ifndef __NO_STUBS
/* user-visible error numbers are in the range -1 - -4095 */
......
......@@ -79,6 +79,8 @@ struct compat_rusage {
compat_long_t ru_nivcsw;
};
extern int put_compat_rusage(const struct rusage *, struct compat_rusage __user *);
struct compat_dirent {
u32 d_ino;
compat_off_t d_off;
......
......@@ -287,6 +287,8 @@ struct signal_struct {
/* thread group stop support, overloads group_exit_code too */
int group_stop_count;
/* 1 if group stopped since last SIGCONT, -1 if SIGCONT since report */
int stop_state;
/* POSIX.1b Interval Timers */
struct list_head posix_timers;
......
......@@ -162,6 +162,8 @@ asmlinkage long sys_exit(int error_code);
asmlinkage void sys_exit_group(int error_code);
asmlinkage long sys_wait4(pid_t pid, unsigned int __user *stat_addr,
int options, struct rusage __user *ru);
asmlinkage long sys_waitid(int which, pid_t pid,
struct siginfo __user *infop, int options);
asmlinkage long sys_waitpid(pid_t pid, unsigned int __user *stat_addr, int options);
asmlinkage long sys_set_tid_address(int __user *tidptr);
asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
......
......@@ -3,11 +3,20 @@
#define WNOHANG 0x00000001
#define WUNTRACED 0x00000002
#define WSTOPPED WUNTRACED
#define WEXITED 0x00000004
#define WCONTINUED 0x00000008
#define WNOWAIT 0x01000000 /* Don't reap, just poll status. */
#define __WNOTHREAD 0x20000000 /* Don't wait on children of other threads in this group */
#define __WALL 0x40000000 /* Wait on all children, regardless of type */
#define __WCLONE 0x80000000 /* Wait only on non-SIGCHLD children */
/* First argument to waitid: */
#define P_ALL 0
#define P_PID 1
#define P_PGID 2
#ifdef __KERNEL__
#include <linux/config.h>
......
......@@ -310,7 +310,7 @@ asmlinkage long compat_sys_getrlimit (unsigned int resource,
return ret;
}
static long put_compat_rusage(struct compat_rusage __user *ru, struct rusage *r)
int put_compat_rusage(const struct rusage *r, struct compat_rusage __user *ru)
{
if (!access_ok(VERIFY_WRITE, ru, sizeof(*ru)) ||
__put_user(r->ru_utime.tv_sec, &ru->ru_utime.tv_sec) ||
......@@ -348,7 +348,7 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
if (ret)
return ret;
if (put_compat_rusage(ru, &r))
if (put_compat_rusage(&r, ru))
return -EFAULT;
return 0;
......@@ -374,7 +374,7 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
set_fs (old_fs);
if (ret > 0) {
if (put_compat_rusage(ru, &r))
if (put_compat_rusage(&r, ru))
return -EFAULT;
if (stat_addr && put_user(status, stat_addr))
return -EFAULT;
......
This diff is collapsed.
......@@ -26,6 +26,8 @@
#include <asm/unistd.h>
#include <asm/siginfo.h>
extern void k_getrusage(struct task_struct *, int, struct rusage *);
/*
* SLAB caches for signal bits.
*/
......@@ -660,6 +662,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
* the SIGCHLD was pending on entry to this kill.
*/
p->signal->group_stop_count = 0;
p->signal->stop_state = 1;
if (p->ptrace & PT_PTRACED)
do_notify_parent_cldstop(p, p->parent);
else
......@@ -696,6 +699,21 @@ static void handle_stop_signal(int sig, struct task_struct *p)
t = next_thread(t);
} while (t != p);
if (p->signal->stop_state > 0) {
/*
* We were in fact stopped, and are now continued.
* Notify the parent with CLD_CONTINUED.
*/
p->signal->stop_state = -1;
p->signal->group_exit_code = 0;
if (p->ptrace & PT_PTRACED)
do_notify_parent_cldstop(p, p->parent);
else
do_notify_parent_cldstop(
p->group_leader,
p->group_leader->real_parent);
}
}
}
......@@ -1466,6 +1484,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
/* FIXME: find out whether or not this is supposed to be c*time. */
info.si_utime = tsk->utime;
info.si_stime = tsk->stime;
k_getrusage(tsk, RUSAGE_BOTH, &info.si_rusage);
status = tsk->exit_code & 0x7f;
why = SI_KERNEL; /* shouldn't happen */
......@@ -1555,9 +1574,16 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent)
/* FIXME: find out whether or not this is supposed to be c*time. */
info.si_utime = tsk->utime;
info.si_stime = tsk->stime;
k_getrusage(tsk, RUSAGE_BOTH, &info.si_rusage);
info.si_status = tsk->exit_code & 0x7f;
info.si_code = CLD_STOPPED;
info.si_status = (tsk->signal ? tsk->signal->group_exit_code :
tsk->exit_code) & 0x7f;
if (info.si_status == 0) {
info.si_status = SIGCONT;
info.si_code = CLD_CONTINUED;
} else {
info.si_code = CLD_STOPPED;
}
sighand = parent->sighand;
spin_lock_irqsave(&sighand->siglock, flags);
......@@ -1623,14 +1649,17 @@ do_signal_stop(int signr)
stop_count = --sig->group_stop_count;
current->exit_code = signr;
set_current_state(TASK_STOPPED);
if (stop_count == 0)
sig->stop_state = 1;
spin_unlock_irq(&sighand->siglock);
}
else if (thread_group_empty(current)) {
/*
* Lock must be held through transition to stopped state.
*/
current->exit_code = signr;
current->exit_code = current->signal->group_exit_code = signr;
set_current_state(TASK_STOPPED);
sig->stop_state = 1;
spin_unlock_irq(&sighand->siglock);
}
else {
......@@ -1696,6 +1725,8 @@ do_signal_stop(int signr)
current->exit_code = signr;
set_current_state(TASK_STOPPED);
if (stop_count == 0)
sig->stop_state = 1;
spin_unlock_irq(&sighand->siglock);
read_unlock(&tasklist_lock);
......@@ -1736,6 +1767,8 @@ static inline int handle_group_stop(void)
* without any associated signal being in our queue.
*/
stop_count = --current->signal->group_stop_count;
if (stop_count == 0)
current->signal->stop_state = 1;
current->exit_code = current->signal->group_exit_code;
set_current_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);
......@@ -2098,6 +2131,8 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
err |= __put_user(from->si_status, &to->si_status);
err |= __put_user(from->si_utime, &to->si_utime);
err |= __put_user(from->si_stime, &to->si_stime);
err |= __copy_to_user(&to->si_rusage, &from->si_rusage,
sizeof(to->si_rusage));
break;
case __SI_RT: /* This is not generated by the kernel as of now. */
case __SI_MESGQ: /* But this is */
......
......@@ -1540,37 +1540,43 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
* reaped till shortly after the call to getrusage(), in both cases the
* task being examined is in a frozen state so the counters won't change.
*/
int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
{
struct rusage r;
memset((char *) &r, 0, sizeof(r));
void k_getrusage(struct task_struct *p, int who, struct rusage *r)
{
memset((char *) r, 0, sizeof *r);
switch (who) {
case RUSAGE_SELF:
jiffies_to_timeval(p->utime, &r.ru_utime);
jiffies_to_timeval(p->stime, &r.ru_stime);
r.ru_nvcsw = p->nvcsw;
r.ru_nivcsw = p->nivcsw;
r.ru_minflt = p->min_flt;
r.ru_majflt = p->maj_flt;
jiffies_to_timeval(p->utime, &r->ru_utime);
jiffies_to_timeval(p->stime, &r->ru_stime);
r->ru_nvcsw = p->nvcsw;
r->ru_nivcsw = p->nivcsw;
r->ru_minflt = p->min_flt;
r->ru_majflt = p->maj_flt;
break;
case RUSAGE_CHILDREN:
jiffies_to_timeval(p->cutime, &r.ru_utime);
jiffies_to_timeval(p->cstime, &r.ru_stime);
r.ru_nvcsw = p->cnvcsw;
r.ru_nivcsw = p->cnivcsw;
r.ru_minflt = p->cmin_flt;
r.ru_majflt = p->cmaj_flt;
jiffies_to_timeval(p->cutime, &r->ru_utime);
jiffies_to_timeval(p->cstime, &r->ru_stime);
r->ru_nvcsw = p->cnvcsw;
r->ru_nivcsw = p->cnivcsw;
r->ru_minflt = p->cmin_flt;
r->ru_majflt = p->cmaj_flt;
break;
default:
jiffies_to_timeval(p->utime + p->cutime, &r.ru_utime);
jiffies_to_timeval(p->stime + p->cstime, &r.ru_stime);
r.ru_nvcsw = p->nvcsw + p->cnvcsw;
r.ru_nivcsw = p->nivcsw + p->cnivcsw;
r.ru_minflt = p->min_flt + p->cmin_flt;
r.ru_majflt = p->maj_flt + p->cmaj_flt;
jiffies_to_timeval(p->utime + p->cutime, &r->ru_utime);
jiffies_to_timeval(p->stime + p->cstime, &r->ru_stime);
r->ru_nvcsw = p->nvcsw + p->cnvcsw;
r->ru_nivcsw = p->nivcsw + p->cnivcsw;
r->ru_minflt = p->min_flt + p->cmin_flt;
r->ru_majflt = p->maj_flt + p->cmaj_flt;
break;
}
}
int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
{
struct rusage r;
k_getrusage(p, who, &r);
return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment