Commit d949d0ec authored by Andrea Arcangeli's avatar Andrea Arcangeli Committed by Linus Torvalds

[PATCH] seccomp: secure computing support

I'd need it merged into mainline at some point, unless anybody has strong
arguments against it.  All I can guarantee here, is that I'll back it out
myself in the future, iff Cpushare will fail and nobody else started using
it in the meantime for similar security purposes.

(akpm: project details are at http://www.cpushare.com/technical.  It seems
like a good idea to me, and one which is worth supporting.  I agree that for
this to be successful, the added robustness of Andrea's simple and specific
jail is worthwhile).
Signed-off-by: default avatarAndrea Arcangeli <andrea@cpushare.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent abbaf4f1
......@@ -896,6 +896,23 @@ config REGPARM
generate incorrect output with certain kernel constructs when
-mregparm=3 is used.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
endmenu
......
......@@ -219,7 +219,8 @@ sysenter_past_esp:
SAVE_ALL
GET_THREAD_INFO(%ebp)
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
......@@ -243,7 +244,8 @@ ENTRY(system_call)
SAVE_ALL
GET_THREAD_INFO(%ebp)
# system call tracing in operation
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp)
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
......
......@@ -15,6 +15,7 @@
#include <linux/user.h>
#include <linux/security.h>
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -678,6 +679,9 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
__attribute__((regparm(3)))
void do_syscall_trace(struct pt_regs *regs, int entryexit)
{
/* do the secure computing check first */
secure_computing(regs->orig_eax);
if (unlikely(current->audit_context)) {
if (!entryexit)
audit_syscall_entry(current, regs->orig_eax,
......
......@@ -350,6 +350,24 @@ config X86_MCE_INTEL
help
Additional support for intel specific MCE features such as
the thermal monitor.
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
depends on PROC_FS
default y
help
This kernel feature is useful for number crunching applications
that may need to compute untrusted bytecode during their
execution. By using pipes or other transports made available to
the process as file descriptors supporting the read/write
syscalls, it's possible to isolate those applications in
their own address space using seccomp. Once seccomp is
enabled via /proc/<pid>/seccomp, it cannot be disabled
and the task is only allowed to execute a few safe syscalls
defined by each seccomp mode.
If unsure, say Y. Only embedded should say N here.
endmenu
#
......
......@@ -78,7 +78,7 @@ ENTRY(ia32_sysenter_target)
.quad 1b,ia32_badarg
.previous
GET_THREAD_INFO(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
jnz sysenter_tracesys
sysenter_do_call:
cmpl $(IA32_NR_syscalls),%eax
......@@ -163,7 +163,7 @@ ENTRY(ia32_cstar_target)
.quad 1b,ia32_badarg
.previous
GET_THREAD_INFO(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
jnz cstar_tracesys
cstar_do_call:
cmpl $IA32_NR_syscalls,%eax
......@@ -236,7 +236,7 @@ ENTRY(ia32_syscall)
this could be a problem. */
SAVE_ARGS 0,0,1
GET_THREAD_INFO(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
jnz ia32_tracesys
ia32_do_syscall:
cmpl $(IA32_NR_syscalls),%eax
......
......@@ -184,7 +184,7 @@ ENTRY(system_call)
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
GET_THREAD_INFO(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
jnz tracesys
cmpq $__NR_syscall_max,%rax
ja badsys
......
......@@ -17,6 +17,7 @@
#include <linux/user.h>
#include <linux/security.h>
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -521,6 +522,9 @@ static void syscall_trace(struct pt_regs *regs)
asmlinkage void syscall_trace_enter(struct pt_regs *regs)
{
/* do the secure computing check first */
secure_computing(regs->orig_rax);
if (unlikely(current->audit_context))
audit_syscall_entry(current, regs->orig_rax,
regs->rdi, regs->rsi,
......
......@@ -32,6 +32,7 @@
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/seccomp.h>
#include "internal.h"
/*
......@@ -49,6 +50,9 @@ enum pid_directory_inos {
PROC_TGID_TASK,
PROC_TGID_STATUS,
PROC_TGID_MEM,
#ifdef CONFIG_SECCOMP
PROC_TGID_SECCOMP,
#endif
PROC_TGID_CWD,
PROC_TGID_ROOT,
PROC_TGID_EXE,
......@@ -80,6 +84,9 @@ enum pid_directory_inos {
PROC_TID_INO,
PROC_TID_STATUS,
PROC_TID_MEM,
#ifdef CONFIG_SECCOMP
PROC_TID_SECCOMP,
#endif
PROC_TID_CWD,
PROC_TID_ROOT,
PROC_TID_EXE,
......@@ -130,6 +137,9 @@ static struct pid_entry tgid_base_stuff[] = {
E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO),
E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO),
E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
#ifdef CONFIG_SECCOMP
E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
#endif
E(PROC_TGID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
E(PROC_TGID_ROOT, "root", S_IFLNK|S_IRWXUGO),
E(PROC_TGID_EXE, "exe", S_IFLNK|S_IRWXUGO),
......@@ -160,6 +170,9 @@ static struct pid_entry tid_base_stuff[] = {
E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO),
E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO),
E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR),
#ifdef CONFIG_SECCOMP
E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
#endif
E(PROC_TID_CWD, "cwd", S_IFLNK|S_IRWXUGO),
E(PROC_TID_ROOT, "root", S_IFLNK|S_IRWXUGO),
E(PROC_TID_EXE, "exe", S_IFLNK|S_IRWXUGO),
......@@ -808,6 +821,61 @@ static struct file_operations proc_loginuid_operations = {
};
#endif
#ifdef CONFIG_SECCOMP
static ssize_t seccomp_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *tsk = proc_task(file->f_dentry->d_inode);
char __buf[20];
loff_t __ppos = *ppos;
size_t len;
/* no need to print the trailing zero, so use only len */
len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
if (__ppos >= len)
return 0;
if (count > len - __ppos)
count = len - __ppos;
if (copy_to_user(buf, __buf + __ppos, count))
return -EFAULT;
*ppos = __ppos + count;
return count;
}
static ssize_t seccomp_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct task_struct *tsk = proc_task(file->f_dentry->d_inode);
char __buf[20], *end;
unsigned int seccomp_mode;
/* can set it only once to be even more secure */
if (unlikely(tsk->seccomp.mode))
return -EPERM;
memset(__buf, 0, sizeof(__buf));
count = min(count, sizeof(__buf) - 1);
if (copy_from_user(__buf, buf, count))
return -EFAULT;
seccomp_mode = simple_strtoul(__buf, &end, 0);
if (*end == '\n')
end++;
if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
tsk->seccomp.mode = seccomp_mode;
set_tsk_thread_flag(tsk, TIF_SECCOMP);
} else
return -EINVAL;
if (unlikely(!(end - __buf)))
return -EIO;
return end - __buf;
}
static struct file_operations proc_seccomp_operations = {
.read = seccomp_read,
.write = seccomp_write,
};
#endif /* CONFIG_SECCOMP */
static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
......@@ -1443,6 +1511,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
inode->i_op = &proc_mem_inode_operations;
inode->i_fop = &proc_mem_operations;
break;
#ifdef CONFIG_SECCOMP
case PROC_TID_SECCOMP:
case PROC_TGID_SECCOMP:
inode->i_fop = &proc_seccomp_operations;
break;
#endif /* CONFIG_SECCOMP */
case PROC_TID_MOUNTS:
case PROC_TGID_MOUNTS:
inode->i_fop = &proc_mounts_operations;
......
......@@ -140,6 +140,7 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
#define TIF_IRET 5 /* return with iret */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 17
......@@ -150,12 +151,14 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__;
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_IRET (1<<TIF_IRET)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
(0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP))
#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
(0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP))
/* work to do on any return to u-space */
#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
/*
* Thread-synchronous status.
......
......@@ -102,6 +102,7 @@ static inline struct thread_info *stack_thread_info(void)
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
#define TIF_IRET 5 /* force IRET */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
......@@ -115,6 +116,7 @@ static inline struct thread_info *stack_thread_info(void)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_IRET (1<<TIF_IRET)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_IA32 (1<<TIF_IA32)
#define _TIF_FORK (1<<TIF_FORK)
......@@ -122,9 +124,9 @@ static inline struct thread_info *stack_thread_info(void)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
(0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP))
(0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP|_TIF_SECCOMP))
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK 0x0000FFFF
#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
#define PREEMPT_ACTIVE 0x10000000
......
......@@ -32,6 +32,7 @@
#include <linux/pid.h>
#include <linux/percpu.h>
#include <linux/topology.h>
#include <linux/seccomp.h>
struct exec_domain;
......@@ -643,6 +644,7 @@ struct task_struct {
void *security;
struct audit_context *audit_context;
seccomp_t seccomp;
/* Thread group tracking */
u32 parent_exec_id;
......
#ifndef _LINUX_SECCOMP_H
#define _LINUX_SECCOMP_H
#include <linux/config.h>
#ifdef CONFIG_SECCOMP
#define NR_SECCOMP_MODES 1
#include <linux/thread_info.h>
typedef struct { int mode; } seccomp_t;
extern void __secure_computing(int);
static inline void secure_computing(int this_syscall)
{
if (unlikely(test_thread_flag(TIF_SECCOMP)))
__secure_computing(this_syscall);
}
#else /* CONFIG_SECCOMP */
#if (__GNUC__ > 2)
typedef struct { } seccomp_t;
#else
typedef struct { int gcc_is_buggy; } seccomp_t;
#endif
#define secure_computing(x) do { } while (0)
#endif /* CONFIG_SECCOMP */
#endif /* _LINUX_SECCOMP_H */
......@@ -26,6 +26,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
ifneq ($(CONFIG_IA64),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
......
/*
* linux/kernel/seccomp.c
*
* Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com>
*
* This defines a simple but solid secure-computing mode.
*/
#include <linux/seccomp.h>
#include <linux/sched.h>
#include <asm/unistd.h>
#ifdef TIF_IA32
#include <asm/ia32_unistd.h>
#endif
/* #define SECCOMP_DEBUG 1 */
/*
* Secure computing mode 1 allows only read/write/exit/sigreturn.
* To be fully secure this must be combined with rlimit
* to limit the stack allocations too.
*/
static int mode1_syscalls[] = {
__NR_read, __NR_write, __NR_exit,
/*
* Allow either sigreturn or rt_sigreturn, newer archs
* like x86-64 only defines __NR_rt_sigreturn.
*/
#ifdef __NR_sigreturn
__NR_sigreturn,
#else
__NR_rt_sigreturn,
#endif
0, /* null terminated */
};
#ifdef TIF_IA32
static int mode1_syscalls_32bit[] = {
__NR_ia32_read, __NR_ia32_write, __NR_ia32_exit,
/*
* Allow either sigreturn or rt_sigreturn, newer archs
* like x86-64 only defines __NR_rt_sigreturn.
*/
__NR_ia32_sigreturn,
0, /* null terminated */
};
#endif
void __secure_computing(int this_syscall)
{
int mode = current->seccomp.mode;
int * syscall;
switch (mode) {
case 1:
syscall = mode1_syscalls;
#ifdef TIF_IA32
if (test_thread_flag(TIF_IA32))
syscall = mode1_syscalls_32bit;
#endif
do {
if (*syscall == this_syscall)
return;
} while (*++syscall);
break;
default:
BUG();
}
#ifdef SECCOMP_DEBUG
dump_stack();
#endif
do_exit(SIGKILL);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment