Commit 34e1169d authored by Kees Cook's avatar Kees Cook Committed by Rusty Russell

module: add syscall to load module from fd

As part of the effort to create a stronger boundary between root and
kernel, Chrome OS wants to be able to enforce that kernel modules are
being loaded only from our read-only crypto-hash verified (dm_verity)
root filesystem. Since the init_module syscall hands the kernel a module
as a memory blob, no reasoning about the origin of the blob can be made.

Earlier proposals for appending signatures to kernel modules would not be
useful in Chrome OS, since it would involve adding an additional set of
keys to our kernel and builds for no good reason: we already trust the
contents of our root filesystem. We don't need to verify those kernel
modules a second time. Having to do signature checking on module loading
would slow us down and be redundant. All we need to know is where a
module is coming from so we can say yes/no to loading it.

If a file descriptor is used as the source of a kernel module, many more
things can be reasoned about. In Chrome OS's case, we could enforce that
the module lives on the filesystem we expect it to live on.  In the case
of IMA (or other LSMs), it would be possible, for example, to examine
extended attributes that may contain signatures over the contents of
the module.

This introduces a new syscall (on x86), similar to init_module, that has
only two arguments. The first argument is used as a file descriptor to
the module and the second argument is a pointer to the NULL terminated
string of module arguments.
Signed-off-by: default avatarKees Cook <keescook@chromium.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (merge fixes)
parent 84ecfd15
...@@ -356,3 +356,4 @@ ...@@ -356,3 +356,4 @@
347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
349 i386 kcmp sys_kcmp 349 i386 kcmp sys_kcmp
350 i386 finit_module sys_finit_module
...@@ -319,6 +319,7 @@ ...@@ -319,6 +319,7 @@
310 64 process_vm_readv sys_process_vm_readv 310 64 process_vm_readv sys_process_vm_readv
311 64 process_vm_writev sys_process_vm_writev 311 64 process_vm_writev sys_process_vm_writev
312 common kcmp sys_kcmp 312 common kcmp sys_kcmp
313 common finit_module sys_finit_module
# #
# x32-specific system call numbers start at 512 to avoid cache impact # x32-specific system call numbers start at 512 to avoid cache impact
......
...@@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, ...@@ -868,4 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
unsigned long idx1, unsigned long idx2); unsigned long idx1, unsigned long idx2);
asmlinkage long sys_finit_module(int fd, const char __user *uargs);
#endif #endif
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/ftrace_event.h> #include <linux/ftrace_event.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/file.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/sysfs.h> #include <linux/sysfs.h>
#include <linux/kernel.h> #include <linux/kernel.h>
...@@ -2425,18 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod, ...@@ -2425,18 +2426,17 @@ static inline void kmemleak_load_module(const struct module *mod,
#endif #endif
#ifdef CONFIG_MODULE_SIG #ifdef CONFIG_MODULE_SIG
static int module_sig_check(struct load_info *info, static int module_sig_check(struct load_info *info)
const void *mod, unsigned long *_len)
{ {
int err = -ENOKEY; int err = -ENOKEY;
unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
unsigned long len = *_len; const void *mod = info->hdr;
if (len > markerlen && if (info->len > markerlen &&
memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
/* We truncate the module to discard the signature */ /* We truncate the module to discard the signature */
*_len -= markerlen; info->len -= markerlen;
err = mod_verify_sig(mod, _len); err = mod_verify_sig(mod, &info->len);
} }
if (!err) { if (!err) {
...@@ -2454,59 +2454,97 @@ static int module_sig_check(struct load_info *info, ...@@ -2454,59 +2454,97 @@ static int module_sig_check(struct load_info *info,
return err; return err;
} }
#else /* !CONFIG_MODULE_SIG */ #else /* !CONFIG_MODULE_SIG */
static int module_sig_check(struct load_info *info, static int module_sig_check(struct load_info *info)
void *mod, unsigned long *len)
{ {
return 0; return 0;
} }
#endif /* !CONFIG_MODULE_SIG */ #endif /* !CONFIG_MODULE_SIG */
/* Sets info->hdr, info->len and info->sig_ok. */ /* Sanity checks against invalid binaries, wrong arch, weird elf version. */
static int copy_and_check(struct load_info *info, static int elf_header_check(struct load_info *info)
const void __user *umod, unsigned long len,
const char __user *uargs)
{ {
int err; if (info->len < sizeof(*(info->hdr)))
Elf_Ehdr *hdr; return -ENOEXEC;
if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0
|| info->hdr->e_type != ET_REL
|| !elf_check_arch(info->hdr)
|| info->hdr->e_shentsize != sizeof(Elf_Shdr))
return -ENOEXEC;
if (len < sizeof(*hdr)) if (info->hdr->e_shoff >= info->len
|| (info->hdr->e_shnum * sizeof(Elf_Shdr) >
info->len - info->hdr->e_shoff))
return -ENOEXEC;
return 0;
}
/* Sets info->hdr and info->len. */
static int copy_module_from_user(const void __user *umod, unsigned long len,
struct load_info *info)
{
info->len = len;
if (info->len < sizeof(*(info->hdr)))
return -ENOEXEC; return -ENOEXEC;
/* Suck in entire file: we'll want most of it. */ /* Suck in entire file: we'll want most of it. */
if ((hdr = vmalloc(len)) == NULL) info->hdr = vmalloc(info->len);
if (!info->hdr)
return -ENOMEM; return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) { if (copy_from_user(info->hdr, umod, info->len) != 0) {
err = -EFAULT; vfree(info->hdr);
goto free_hdr; return -EFAULT;
} }
err = module_sig_check(info, hdr, &len); return 0;
}
/* Sets info->hdr and info->len. */
static int copy_module_from_fd(int fd, struct load_info *info)
{
struct file *file;
int err;
struct kstat stat;
loff_t pos;
ssize_t bytes = 0;
file = fget(fd);
if (!file)
return -ENOEXEC;
err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
if (err) if (err)
goto free_hdr; goto out;
/* Sanity checks against insmoding binaries or wrong arch, if (stat.size > INT_MAX) {
weird elf version */ err = -EFBIG;
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0 goto out;
|| hdr->e_type != ET_REL
|| !elf_check_arch(hdr)
|| hdr->e_shentsize != sizeof(Elf_Shdr)) {
err = -ENOEXEC;
goto free_hdr;
} }
info->hdr = vmalloc(stat.size);
if (hdr->e_shoff >= len || if (!info->hdr) {
hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) { err = -ENOMEM;
err = -ENOEXEC; goto out;
goto free_hdr;
} }
info->hdr = hdr; pos = 0;
info->len = len; while (pos < stat.size) {
return 0; bytes = kernel_read(file, pos, (char *)(info->hdr) + pos,
stat.size - pos);
if (bytes < 0) {
vfree(info->hdr);
err = bytes;
goto out;
}
if (bytes == 0)
break;
pos += bytes;
}
info->len = pos;
free_hdr: out:
vfree(hdr); fput(file);
return err; return err;
} }
...@@ -2945,33 +2983,123 @@ static bool finished_loading(const char *name) ...@@ -2945,33 +2983,123 @@ static bool finished_loading(const char *name)
return ret; return ret;
} }
/* Call module constructors. */
static void do_mod_ctors(struct module *mod)
{
#ifdef CONFIG_CONSTRUCTORS
unsigned long i;
for (i = 0; i < mod->num_ctors; i++)
mod->ctors[i]();
#endif
}
/* This is where the real work happens */
static int do_init_module(struct module *mod)
{
int ret = 0;
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_COMING, mod);
/* Set RO and NX regions for core */
set_section_ro_nx(mod->module_core,
mod->core_text_size,
mod->core_ro_size,
mod->core_size);
/* Set RO and NX regions for init */
set_section_ro_nx(mod->module_init,
mod->init_text_size,
mod->init_ro_size,
mod->init_size);
do_mod_ctors(mod);
/* Start the module */
if (mod->init != NULL)
ret = do_one_initcall(mod->init);
if (ret < 0) {
/* Init routine failed: abort. Try to protect us from
buggy refcounters. */
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
free_module(mod);
wake_up_all(&module_wq);
return ret;
}
if (ret > 0) {
printk(KERN_WARNING
"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
"%s: loading module anyway...\n",
__func__, mod->name, ret,
__func__);
dump_stack();
}
/* Now it's a first class citizen! */
mod->state = MODULE_STATE_LIVE;
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_LIVE, mod);
/* We need to finish all async code before the module init sequence is done */
async_synchronize_full();
mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
mod->num_symtab = mod->core_num_syms;
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
unset_module_init_ro_nx(mod);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
mod->init_ro_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);
wake_up_all(&module_wq);
return 0;
}
static int may_init_module(void)
{
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
return 0;
}
/* Allocate and load the module: note that size of section 0 is always /* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */ zero, and we rely on this for optional sections. */
static struct module *load_module(void __user *umod, static int load_module(struct load_info *info, const char __user *uargs)
unsigned long len,
const char __user *uargs)
{ {
struct load_info info = { NULL, };
struct module *mod, *old; struct module *mod, *old;
long err; long err;
pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n", err = module_sig_check(info);
umod, len, uargs); if (err)
goto free_copy;
/* Copy in the blobs from userspace, check they are vaguely sane. */ err = elf_header_check(info);
err = copy_and_check(&info, umod, len, uargs);
if (err) if (err)
return ERR_PTR(err); goto free_copy;
/* Figure out module layout, and allocate all the memory. */ /* Figure out module layout, and allocate all the memory. */
mod = layout_and_allocate(&info); mod = layout_and_allocate(info);
if (IS_ERR(mod)) { if (IS_ERR(mod)) {
err = PTR_ERR(mod); err = PTR_ERR(mod);
goto free_copy; goto free_copy;
} }
#ifdef CONFIG_MODULE_SIG #ifdef CONFIG_MODULE_SIG
mod->sig_ok = info.sig_ok; mod->sig_ok = info->sig_ok;
if (!mod->sig_ok) if (!mod->sig_ok)
add_taint_module(mod, TAINT_FORCED_MODULE); add_taint_module(mod, TAINT_FORCED_MODULE);
#endif #endif
...@@ -2983,25 +3111,25 @@ static struct module *load_module(void __user *umod, ...@@ -2983,25 +3111,25 @@ static struct module *load_module(void __user *umod,
/* Now we've got everything in the final locations, we can /* Now we've got everything in the final locations, we can
* find optional sections. */ * find optional sections. */
find_module_sections(mod, &info); find_module_sections(mod, info);
err = check_module_license_and_versions(mod); err = check_module_license_and_versions(mod);
if (err) if (err)
goto free_unload; goto free_unload;
/* Set up MODINFO_ATTR fields */ /* Set up MODINFO_ATTR fields */
setup_modinfo(mod, &info); setup_modinfo(mod, info);
/* Fix up syms, so that st_value is a pointer to location. */ /* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(mod, &info); err = simplify_symbols(mod, info);
if (err < 0) if (err < 0)
goto free_modinfo; goto free_modinfo;
err = apply_relocations(mod, &info); err = apply_relocations(mod, info);
if (err < 0) if (err < 0)
goto free_modinfo; goto free_modinfo;
err = post_relocation(mod, &info); err = post_relocation(mod, info);
if (err < 0) if (err < 0)
goto free_modinfo; goto free_modinfo;
...@@ -3041,14 +3169,14 @@ static struct module *load_module(void __user *umod, ...@@ -3041,14 +3169,14 @@ static struct module *load_module(void __user *umod,
} }
/* This has to be done once we're sure module name is unique. */ /* This has to be done once we're sure module name is unique. */
dynamic_debug_setup(info.debug, info.num_debug); dynamic_debug_setup(info->debug, info->num_debug);
/* Find duplicate symbols */ /* Find duplicate symbols */
err = verify_export_symbols(mod); err = verify_export_symbols(mod);
if (err < 0) if (err < 0)
goto ddebug; goto ddebug;
module_bug_finalize(info.hdr, info.sechdrs, mod); module_bug_finalize(info->hdr, info->sechdrs, mod);
list_add_rcu(&mod->list, &modules); list_add_rcu(&mod->list, &modules);
mutex_unlock(&module_mutex); mutex_unlock(&module_mutex);
...@@ -3059,16 +3187,17 @@ static struct module *load_module(void __user *umod, ...@@ -3059,16 +3187,17 @@ static struct module *load_module(void __user *umod,
goto unlink; goto unlink;
/* Link in to syfs. */ /* Link in to syfs. */
err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp); err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
if (err < 0) if (err < 0)
goto unlink; goto unlink;
/* Get rid of temporary copy. */ /* Get rid of temporary copy. */
free_copy(&info); free_copy(info);
/* Done! */ /* Done! */
trace_module_load(mod); trace_module_load(mod);
return mod;
return do_init_module(mod);
unlink: unlink:
mutex_lock(&module_mutex); mutex_lock(&module_mutex);
...@@ -3077,7 +3206,7 @@ static struct module *load_module(void __user *umod, ...@@ -3077,7 +3206,7 @@ static struct module *load_module(void __user *umod,
module_bug_cleanup(mod); module_bug_cleanup(mod);
wake_up_all(&module_wq); wake_up_all(&module_wq);
ddebug: ddebug:
dynamic_debug_remove(info.debug); dynamic_debug_remove(info->debug);
unlock: unlock:
mutex_unlock(&module_mutex); mutex_unlock(&module_mutex);
synchronize_sched(); synchronize_sched();
...@@ -3089,106 +3218,48 @@ static struct module *load_module(void __user *umod, ...@@ -3089,106 +3218,48 @@ static struct module *load_module(void __user *umod,
free_unload: free_unload:
module_unload_free(mod); module_unload_free(mod);
free_module: free_module:
module_deallocate(mod, &info); module_deallocate(mod, info);
free_copy: free_copy:
free_copy(&info); free_copy(info);
return ERR_PTR(err); return err;
}
/* Call module constructors. */
static void do_mod_ctors(struct module *mod)
{
#ifdef CONFIG_CONSTRUCTORS
unsigned long i;
for (i = 0; i < mod->num_ctors; i++)
mod->ctors[i]();
#endif
} }
/* This is where the real work happens */
SYSCALL_DEFINE3(init_module, void __user *, umod, SYSCALL_DEFINE3(init_module, void __user *, umod,
unsigned long, len, const char __user *, uargs) unsigned long, len, const char __user *, uargs)
{ {
struct module *mod; int err;
int ret = 0; struct load_info info = { };
/* Must have permission */
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
/* Do all the hard work */ err = may_init_module();
mod = load_module(umod, len, uargs); if (err)
if (IS_ERR(mod)) return err;
return PTR_ERR(mod);
blocking_notifier_call_chain(&module_notify_list, pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",
MODULE_STATE_COMING, mod); umod, len, uargs);
/* Set RO and NX regions for core */ err = copy_module_from_user(umod, len, &info);
set_section_ro_nx(mod->module_core, if (err)
mod->core_text_size, return err;
mod->core_ro_size,
mod->core_size);
/* Set RO and NX regions for init */ return load_module(&info, uargs);
set_section_ro_nx(mod->module_init, }
mod->init_text_size,
mod->init_ro_size,
mod->init_size);
do_mod_ctors(mod); SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs)
/* Start the module */ {
if (mod->init != NULL) int err;
ret = do_one_initcall(mod->init); struct load_info info = { };
if (ret < 0) {
/* Init routine failed: abort. Try to protect us from
buggy refcounters. */
mod->state = MODULE_STATE_GOING;
synchronize_sched();
module_put(mod);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
free_module(mod);
wake_up_all(&module_wq);
return ret;
}
if (ret > 0) {
printk(KERN_WARNING
"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
"%s: loading module anyway...\n",
__func__, mod->name, ret,
__func__);
dump_stack();
}
/* Now it's a first class citizen! */ err = may_init_module();
mod->state = MODULE_STATE_LIVE; if (err)
blocking_notifier_call_chain(&module_notify_list, return err;
MODULE_STATE_LIVE, mod);
/* We need to finish all async code before the module init sequence is done */ pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs);
async_synchronize_full();
mutex_lock(&module_mutex); err = copy_module_from_fd(fd, &info);
/* Drop initial reference. */ if (err)
module_put(mod); return err;
trim_init_extable(mod);
#ifdef CONFIG_KALLSYMS
mod->num_symtab = mod->core_num_syms;
mod->symtab = mod->core_symtab;
mod->strtab = mod->core_strtab;
#endif
unset_module_init_ro_nx(mod);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
mod->init_ro_size = 0;
mod->init_text_size = 0;
mutex_unlock(&module_mutex);
wake_up_all(&module_wq);
return 0; return load_module(&info, uargs);
} }
static inline int within(unsigned long addr, void *start, unsigned long size) static inline int within(unsigned long addr, void *start, unsigned long size)
......
...@@ -25,6 +25,7 @@ cond_syscall(sys_swapoff); ...@@ -25,6 +25,7 @@ cond_syscall(sys_swapoff);
cond_syscall(sys_kexec_load); cond_syscall(sys_kexec_load);
cond_syscall(compat_sys_kexec_load); cond_syscall(compat_sys_kexec_load);
cond_syscall(sys_init_module); cond_syscall(sys_init_module);
cond_syscall(sys_finit_module);
cond_syscall(sys_delete_module); cond_syscall(sys_delete_module);
cond_syscall(sys_socketpair); cond_syscall(sys_socketpair);
cond_syscall(sys_bind); cond_syscall(sys_bind);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment