Commit 449325b5 authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by David S. Miller

umh: introduce fork_usermode_blob() helper

Introduce helper:
int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
struct umh_info {
       struct file *pipe_to_umh;
       struct file *pipe_from_umh;
       pid_t pid;
};

that GPLed kernel modules (signed or unsigned) can use it to execute part
of its own data as swappable user mode process.

The kernel will do:
- allocate a unique file in tmpfs
- populate that file with [data, data + len] bytes
- user-mode-helper code will do_execve that file and, before the process
  starts, the kernel will create two unix pipes for bidirectional
  communication between kernel module and umh
- close tmpfs file, effectively deleting it
- the fork_usermode_blob will return zero on success and populate
  'struct umh_info' with two unix pipes and the pid of the user process

As the first step in the development of the bpfilter project
the fork_usermode_blob() helper is introduced to allow user mode code
to be invoked from a kernel module. The idea is that user mode code plus
normal kernel module code are built as part of the kernel build
and installed as traditional kernel module into distro specified location,
such that from a distribution point of view, there is
no difference between regular kernel modules and kernel modules + umh code.
Such modules can be signed, modprobed, rmmod, etc. The use of this new helper
by a kernel module doesn't make it any special from kernel and user space
tooling point of view.

Such approach enables kernel to delegate functionality traditionally done
by the kernel modules into the user space processes (either root or !root) and
reduces security attack surface of the new code. The buggy umh code would crash
the user process, but not the kernel. Another advantage is that umh code
of the kernel module can be debugged and tested out of user space
(e.g. opening the possibility to run clang sanitizers, fuzzers or
user space test suites on the umh code).
In case of the bpfilter project such architecture allows complex control plane
to be done in the user space while bpf based data plane stays in the kernel.

Since umh can crash, can be oom-ed by the kernel, killed by the admin,
the kernel module that uses them (like bpfilter) needs to manage life
time of umh on its own via two unix pipes and the pid of umh.

The exit code of such kernel module should kill the umh it started,
so that rmmod of the kernel module will cleanup the corresponding umh.
Just like if the kernel module does kmalloc() it should kfree() it
in the exit code.
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1fe8c06c
...@@ -1706,14 +1706,13 @@ static int exec_binprm(struct linux_binprm *bprm) ...@@ -1706,14 +1706,13 @@ static int exec_binprm(struct linux_binprm *bprm)
/* /*
* sys_execve() executes a new program. * sys_execve() executes a new program.
*/ */
static int do_execveat_common(int fd, struct filename *filename, static int __do_execve_file(int fd, struct filename *filename,
struct user_arg_ptr argv, struct user_arg_ptr argv,
struct user_arg_ptr envp, struct user_arg_ptr envp,
int flags) int flags, struct file *file)
{ {
char *pathbuf = NULL; char *pathbuf = NULL;
struct linux_binprm *bprm; struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced; struct files_struct *displaced;
int retval; int retval;
...@@ -1752,6 +1751,7 @@ static int do_execveat_common(int fd, struct filename *filename, ...@@ -1752,6 +1751,7 @@ static int do_execveat_common(int fd, struct filename *filename,
check_unsafe_exec(bprm); check_unsafe_exec(bprm);
current->in_execve = 1; current->in_execve = 1;
if (!file)
file = do_open_execat(fd, filename, flags); file = do_open_execat(fd, filename, flags);
retval = PTR_ERR(file); retval = PTR_ERR(file);
if (IS_ERR(file)) if (IS_ERR(file))
...@@ -1760,7 +1760,9 @@ static int do_execveat_common(int fd, struct filename *filename, ...@@ -1760,7 +1760,9 @@ static int do_execveat_common(int fd, struct filename *filename,
sched_exec(); sched_exec();
bprm->file = file; bprm->file = file;
if (fd == AT_FDCWD || filename->name[0] == '/') { if (!filename) {
bprm->filename = "none";
} else if (fd == AT_FDCWD || filename->name[0] == '/') {
bprm->filename = filename->name; bprm->filename = filename->name;
} else { } else {
if (filename->name[0] == '\0') if (filename->name[0] == '\0')
...@@ -1826,6 +1828,7 @@ static int do_execveat_common(int fd, struct filename *filename, ...@@ -1826,6 +1828,7 @@ static int do_execveat_common(int fd, struct filename *filename,
task_numa_free(current); task_numa_free(current);
free_bprm(bprm); free_bprm(bprm);
kfree(pathbuf); kfree(pathbuf);
if (filename)
putname(filename); putname(filename);
if (displaced) if (displaced)
put_files_struct(displaced); put_files_struct(displaced);
...@@ -1849,10 +1852,27 @@ static int do_execveat_common(int fd, struct filename *filename, ...@@ -1849,10 +1852,27 @@ static int do_execveat_common(int fd, struct filename *filename,
if (displaced) if (displaced)
reset_files_struct(displaced); reset_files_struct(displaced);
out_ret: out_ret:
if (filename)
putname(filename); putname(filename);
return retval; return retval;
} }
static int do_execveat_common(int fd, struct filename *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
int flags)
{
return __do_execve_file(fd, filename, argv, envp, flags, NULL);
}
int do_execve_file(struct file *file, void *__argv, void *__envp)
{
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
}
int do_execve(struct filename *filename, int do_execve(struct filename *filename,
const char __user *const __user *__argv, const char __user *const __user *__argv,
const char __user *const __user *__envp) const char __user *const __user *__envp)
......
...@@ -150,5 +150,6 @@ extern int do_execveat(int, struct filename *, ...@@ -150,5 +150,6 @@ extern int do_execveat(int, struct filename *,
const char __user * const __user *, const char __user * const __user *,
const char __user * const __user *, const char __user * const __user *,
int); int);
int do_execve_file(struct file *file, void *__argv, void *__envp);
#endif /* _LINUX_BINFMTS_H */ #endif /* _LINUX_BINFMTS_H */
...@@ -22,8 +22,10 @@ struct subprocess_info { ...@@ -22,8 +22,10 @@ struct subprocess_info {
const char *path; const char *path;
char **argv; char **argv;
char **envp; char **envp;
struct file *file;
int wait; int wait;
int retval; int retval;
pid_t pid;
int (*init)(struct subprocess_info *info, struct cred *new); int (*init)(struct subprocess_info *info, struct cred *new);
void (*cleanup)(struct subprocess_info *info); void (*cleanup)(struct subprocess_info *info);
void *data; void *data;
...@@ -38,6 +40,16 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp, ...@@ -38,6 +40,16 @@ call_usermodehelper_setup(const char *path, char **argv, char **envp,
int (*init)(struct subprocess_info *info, struct cred *new), int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *), void *data); void (*cleanup)(struct subprocess_info *), void *data);
struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *), void *data);
struct umh_info {
struct file *pipe_to_umh;
struct file *pipe_from_umh;
pid_t pid;
};
int fork_usermode_blob(void *data, size_t len, struct umh_info *info);
extern int extern int
call_usermodehelper_exec(struct subprocess_info *info, int wait); call_usermodehelper_exec(struct subprocess_info *info, int wait);
......
...@@ -25,6 +25,8 @@ ...@@ -25,6 +25,8 @@
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <linux/async.h> #include <linux/async.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/shmem_fs.h>
#include <linux/pipe_fs_i.h>
#include <trace/events/module.h> #include <trace/events/module.h>
...@@ -97,6 +99,10 @@ static int call_usermodehelper_exec_async(void *data) ...@@ -97,6 +99,10 @@ static int call_usermodehelper_exec_async(void *data)
commit_creds(new); commit_creds(new);
if (sub_info->file)
retval = do_execve_file(sub_info->file,
sub_info->argv, sub_info->envp);
else
retval = do_execve(getname_kernel(sub_info->path), retval = do_execve(getname_kernel(sub_info->path),
(const char __user *const __user *)sub_info->argv, (const char __user *const __user *)sub_info->argv,
(const char __user *const __user *)sub_info->envp); (const char __user *const __user *)sub_info->envp);
...@@ -185,6 +191,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work) ...@@ -185,6 +191,8 @@ static void call_usermodehelper_exec_work(struct work_struct *work)
if (pid < 0) { if (pid < 0) {
sub_info->retval = pid; sub_info->retval = pid;
umh_complete(sub_info); umh_complete(sub_info);
} else {
sub_info->pid = pid;
} }
} }
} }
...@@ -393,6 +401,117 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv, ...@@ -393,6 +401,117 @@ struct subprocess_info *call_usermodehelper_setup(const char *path, char **argv,
} }
EXPORT_SYMBOL(call_usermodehelper_setup); EXPORT_SYMBOL(call_usermodehelper_setup);
struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
int (*init)(struct subprocess_info *info, struct cred *new),
void (*cleanup)(struct subprocess_info *info), void *data)
{
struct subprocess_info *sub_info;
sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
if (!sub_info)
return NULL;
INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
sub_info->path = "none";
sub_info->file = file;
sub_info->init = init;
sub_info->cleanup = cleanup;
sub_info->data = data;
return sub_info;
}
static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
{
struct umh_info *umh_info = info->data;
struct file *from_umh[2];
struct file *to_umh[2];
int err;
/* create pipe to send data to umh */
err = create_pipe_files(to_umh, 0);
if (err)
return err;
err = replace_fd(0, to_umh[0], 0);
fput(to_umh[0]);
if (err < 0) {
fput(to_umh[1]);
return err;
}
/* create pipe to receive data from umh */
err = create_pipe_files(from_umh, 0);
if (err) {
fput(to_umh[1]);
replace_fd(0, NULL, 0);
return err;
}
err = replace_fd(1, from_umh[1], 0);
fput(from_umh[1]);
if (err < 0) {
fput(to_umh[1]);
replace_fd(0, NULL, 0);
fput(from_umh[0]);
return err;
}
umh_info->pipe_to_umh = to_umh[1];
umh_info->pipe_from_umh = from_umh[0];
return 0;
}
static void umh_save_pid(struct subprocess_info *info)
{
struct umh_info *umh_info = info->data;
umh_info->pid = info->pid;
}
/**
* fork_usermode_blob - fork a blob of bytes as a usermode process
* @data: a blob of bytes that can be do_execv-ed as a file
* @len: length of the blob
* @info: information about usermode process (shouldn't be NULL)
*
* Returns either negative error or zero which indicates success
* in executing a blob of bytes as a usermode process. In such
* case 'struct umh_info *info' is populated with two pipes
* and a pid of the process. The caller is responsible for health
* check of the user process, killing it via pid, and closing the
* pipes when user process is no longer needed.
*/
int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
{
struct subprocess_info *sub_info;
struct file *file;
ssize_t written;
loff_t pos = 0;
int err;
file = shmem_kernel_file_setup("", len, 0);
if (IS_ERR(file))
return PTR_ERR(file);
written = kernel_write(file, data, len, &pos);
if (written != len) {
err = written;
if (err >= 0)
err = -ENOMEM;
goto out;
}
err = -ENOMEM;
sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,
umh_save_pid, info);
if (!sub_info)
goto out;
err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
out:
fput(file);
return err;
}
EXPORT_SYMBOL_GPL(fork_usermode_blob);
/** /**
* call_usermodehelper_exec - start a usermode application * call_usermodehelper_exec - start a usermode application
* @sub_info: information about the subprocessa * @sub_info: information about the subprocessa
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment