Commit ec40758b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'v6.4/pidfd.file' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux

Pull pidfd updates from Christian Brauner:
 "This adds a new pidfd_prepare() helper which allows the caller to
  reserve a pidfd number and allocates a new pidfd file that stashes the
  provided struct pid.

  It should be avoided installing a file descriptor into a task's file
  descriptor table just to close it again via close_fd() in case an
  error occurs. The fd has been visible to userspace and might already
  be in use. Instead, a file descriptor should be reserved but not
  installed into the caller's file descriptor table.

  If another failure path is hit then the reserved file descriptor and
  file can just be put without any userspace visible side-effects. And
  if all failure paths are cleared the file descriptor and file can be
  installed into the task's file descriptor table.

  This helper is now used in all places that open coded this
  functionality before. For example, this is currently done during
  copy_process() and fanotify used pidfd_create(), which returns a pidfd
  that has already been made visibile in the caller's file descriptor
  table, but then closed it using close_fd().

  In one of the next merge windows there is also new functionality
  coming to unix domain sockets that will have to rely on
  pidfd_prepare()"

* tag 'v6.4/pidfd.file' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  fanotify: use pidfd_prepare()
  fork: use pidfd_prepare()
  pid: add pidfd_prepare()
parents 3323ddce eee3a0e9
...@@ -663,7 +663,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, ...@@ -663,7 +663,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
struct fanotify_info *info = fanotify_event_info(event); struct fanotify_info *info = fanotify_event_info(event);
unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES); unsigned int info_mode = FAN_GROUP_FLAG(group, FANOTIFY_INFO_MODES);
unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD; unsigned int pidfd_mode = info_mode & FAN_REPORT_PIDFD;
struct file *f = NULL; struct file *f = NULL, *pidfd_file = NULL;
int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD; int ret, pidfd = FAN_NOPIDFD, fd = FAN_NOFD;
pr_debug("%s: group=%p event=%p\n", __func__, group, event); pr_debug("%s: group=%p event=%p\n", __func__, group, event);
...@@ -718,7 +718,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, ...@@ -718,7 +718,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
!pid_has_task(event->pid, PIDTYPE_TGID)) { !pid_has_task(event->pid, PIDTYPE_TGID)) {
pidfd = FAN_NOPIDFD; pidfd = FAN_NOPIDFD;
} else { } else {
pidfd = pidfd_create(event->pid, 0); pidfd = pidfd_prepare(event->pid, 0, &pidfd_file);
if (pidfd < 0) if (pidfd < 0)
pidfd = FAN_EPIDFD; pidfd = FAN_EPIDFD;
} }
...@@ -751,6 +751,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, ...@@ -751,6 +751,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
if (f) if (f)
fd_install(fd, f); fd_install(fd, f);
if (pidfd_file)
fd_install(pidfd, pidfd_file);
return metadata.event_len; return metadata.event_len;
out_close_fd: out_close_fd:
...@@ -759,8 +762,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, ...@@ -759,8 +762,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
fput(f); fput(f);
} }
if (pidfd >= 0) if (pidfd >= 0) {
close_fd(pidfd); put_unused_fd(pidfd);
fput(pidfd_file);
}
return ret; return ret;
} }
......
...@@ -80,6 +80,7 @@ extern struct pid *pidfd_pid(const struct file *file); ...@@ -80,6 +80,7 @@ extern struct pid *pidfd_pid(const struct file *file);
struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
int pidfd_create(struct pid *pid, unsigned int flags); int pidfd_create(struct pid *pid, unsigned int flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
static inline struct pid *get_pid(struct pid *pid) static inline struct pid *get_pid(struct pid *pid)
{ {
......
...@@ -1961,6 +1961,91 @@ const struct file_operations pidfd_fops = { ...@@ -1961,6 +1961,91 @@ const struct file_operations pidfd_fops = {
#endif #endif
}; };
/**
* __pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd
* @flags: flags of the new @pidfd
* @pidfd: the pidfd to return
*
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
* The helper doesn't perform checks on @pid which makes it useful for pidfds
* created via CLONE_PIDFD where @pid has no task attached when the pidfd and
* pidfd file are prepared.
*
* If this function returns successfully the caller is responsible to either
* call fd_install() passing the returned pidfd and pidfd file as arguments in
* order to install the pidfd into its file descriptor table or they must use
* put_unused_fd() and fput() on the returned pidfd and pidfd file
* respectively.
*
* This function is useful when a pidfd must already be reserved but there
* might still be points of failure afterwards and the caller wants to ensure
* that no pidfd is leaked into its file descriptor table.
*
* Return: On success, a reserved pidfd is returned from the function and a new
* pidfd file is returned in the last argument to the function. On
* error, a negative error code is returned from the function and the
* last argument remains unchanged.
*/
static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
int pidfd;
struct file *pidfd_file;
if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC))
return -EINVAL;
pidfd = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (pidfd < 0)
return pidfd;
pidfd_file = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
flags | O_RDWR | O_CLOEXEC);
if (IS_ERR(pidfd_file)) {
put_unused_fd(pidfd);
return PTR_ERR(pidfd_file);
}
get_pid(pid); /* held by pidfd_file now */
*ret = pidfd_file;
return pidfd;
}
/**
* pidfd_prepare - allocate a new pidfd_file and reserve a pidfd
* @pid: the struct pid for which to create a pidfd
* @flags: flags of the new @pidfd
* @pidfd: the pidfd to return
*
* Allocate a new file that stashes @pid and reserve a new pidfd number in the
* caller's file descriptor table. The pidfd is reserved but not installed yet.
*
* The helper verifies that @pid is used as a thread group leader.
*
* If this function returns successfully the caller is responsible to either
* call fd_install() passing the returned pidfd and pidfd file as arguments in
* order to install the pidfd into its file descriptor table or they must use
* put_unused_fd() and fput() on the returned pidfd and pidfd file
* respectively.
*
* This function is useful when a pidfd must already be reserved but there
* might still be points of failure afterwards and the caller wants to ensure
* that no pidfd is leaked into its file descriptor table.
*
* Return: On success, a reserved pidfd is returned from the function and a new
* pidfd file is returned in the last argument to the function. On
* error, a negative error code is returned from the function and the
* last argument remains unchanged.
*/
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
return -EINVAL;
return __pidfd_prepare(pid, flags, ret);
}
static void __delayed_free_task(struct rcu_head *rhp) static void __delayed_free_task(struct rcu_head *rhp)
{ {
struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
...@@ -2309,21 +2394,12 @@ __latent_entropy struct task_struct *copy_process( ...@@ -2309,21 +2394,12 @@ __latent_entropy struct task_struct *copy_process(
* if the fd table isn't shared). * if the fd table isn't shared).
*/ */
if (clone_flags & CLONE_PIDFD) { if (clone_flags & CLONE_PIDFD) {
retval = get_unused_fd_flags(O_RDWR | O_CLOEXEC); /* Note that no task has been attached to @pid yet. */
retval = __pidfd_prepare(pid, O_RDWR | O_CLOEXEC, &pidfile);
if (retval < 0) if (retval < 0)
goto bad_fork_free_pid; goto bad_fork_free_pid;
pidfd = retval; pidfd = retval;
pidfile = anon_inode_getfile("[pidfd]", &pidfd_fops, pid,
O_RDWR | O_CLOEXEC);
if (IS_ERR(pidfile)) {
put_unused_fd(pidfd);
retval = PTR_ERR(pidfile);
goto bad_fork_free_pid;
}
get_pid(pid); /* held by pidfile now */
retval = put_user(pidfd, args->pidfd); retval = put_user(pidfd, args->pidfd);
if (retval) if (retval)
goto bad_fork_put_pidfd; goto bad_fork_put_pidfd;
......
...@@ -594,20 +594,15 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags) ...@@ -594,20 +594,15 @@ struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
*/ */
int pidfd_create(struct pid *pid, unsigned int flags) int pidfd_create(struct pid *pid, unsigned int flags)
{ {
int fd; int pidfd;
struct file *pidfd_file;
if (!pid || !pid_has_task(pid, PIDTYPE_TGID))
return -EINVAL;
if (flags & ~(O_NONBLOCK | O_RDWR | O_CLOEXEC)) pidfd = pidfd_prepare(pid, flags, &pidfd_file);
return -EINVAL; if (pidfd < 0)
return pidfd;
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid), fd_install(pidfd, pidfd_file);
flags | O_RDWR | O_CLOEXEC); return pidfd;
if (fd < 0)
put_pid(pid);
return fd;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment