Commit 412ac77a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull namespace updates from Eric Biederman:
 "After a lot of discussion and work we have finally reachanged a basic
  understanding of what is necessary to make unprivileged mounts safe in
  the presence of EVM and IMA xattrs which the last commit in this
  series reflects. While technically it is a revert the comments it adds
  are important for people not getting confused in the future. Clearing
  up that confusion allows us to seriously work on unprivileged mounts
  of fuse in the next development cycle.

  The rest of the fixes in this set are in the intersection of user
  namespaces, ptrace, and exec. I started with the first fix which
  started a feedback cycle of finding additional issues during review
  and fixing them. Culiminating in a fix for a bug that has been present
  since at least Linux v1.0.

  Potentially these fixes were candidates for being merged during the rc
  cycle, and are certainly backport candidates but enough little things
  turned up during review and testing that I decided they should be
  handled as part of the normal development process just to be certain
  there were not any great surprises when it came time to backport some
  of these fixes"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  Revert "evm: Translate user/group ids relative to s_user_ns when computing HMAC"
  exec: Ensure mm->user_ns contains the execed files
  ptrace: Don't allow accessing an undumpable mm
  ptrace: Capture the ptracer's creds not PT_PTRACE_CAP
  mm: Add a user_ns owner to mm_struct and fix ptrace permission checks
parents dcdaa2f9 19339c25
......@@ -283,7 +283,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* When I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
case PTRACE_PEEKDATA:
copied = access_process_vm(child, addr, &tmp, sizeof(tmp),
copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp),
FOLL_FORCE);
ret = -EIO;
if (copied != sizeof(tmp))
......
......@@ -270,7 +270,7 @@ long arch_ptrace(struct task_struct *child, long request,
switch (bfin_mem_access_type(addr, to_copy)) {
case BFIN_MEM_ACCESS_CORE:
case BFIN_MEM_ACCESS_CORE_ONLY:
copied = access_process_vm(child, addr, &tmp,
copied = ptrace_access_vm(child, addr, &tmp,
to_copy, FOLL_FORCE);
if (copied)
break;
......@@ -323,7 +323,7 @@ long arch_ptrace(struct task_struct *child, long request,
switch (bfin_mem_access_type(addr, to_copy)) {
case BFIN_MEM_ACCESS_CORE:
case BFIN_MEM_ACCESS_CORE_ONLY:
copied = access_process_vm(child, addr, &data,
copied = ptrace_access_vm(child, addr, &data,
to_copy,
FOLL_FORCE | FOLL_WRITE);
break;
......
......@@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request,
/* The trampoline page is globally mapped, no page table to traverse.*/
tmp = *(unsigned long*)addr;
} else {
copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
copied = ptrace_access_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
break;
......
......@@ -1159,7 +1159,7 @@ arch_ptrace (struct task_struct *child, long request,
case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
/* read word at location addr */
if (access_process_vm(child, addr, &data, sizeof(data),
if (ptrace_access_vm(child, addr, &data, sizeof(data),
FOLL_FORCE)
!= sizeof(data))
return -EIO;
......
......@@ -69,7 +69,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0)
break;
copied = access_process_vm(child, (u64)addrOthers, &tmp,
copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
break;
......@@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *) (unsigned long) addr) != 0)
break;
ret = 0;
if (access_process_vm(child, (u64)addrOthers, &data,
if (ptrace_access_vm(child, (u64)addrOthers, &data,
sizeof(data),
FOLL_FORCE | FOLL_WRITE) == sizeof(data))
break;
......
......@@ -73,7 +73,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
break;
copied = access_process_vm(child, (u64)addrOthers, &tmp,
copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
break;
......@@ -178,7 +178,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
break;
ret = 0;
if (access_process_vm(child, (u64)addrOthers, &tmp,
if (ptrace_access_vm(child, (u64)addrOthers, &tmp,
sizeof(tmp),
FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
break;
......
......@@ -1277,8 +1277,22 @@ EXPORT_SYMBOL(flush_old_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
{
if (inode_permission(file_inode(file), MAY_READ) < 0)
struct inode *inode = file_inode(file);
if (inode_permission(inode, MAY_READ) < 0) {
struct user_namespace *old, *user_ns;
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
/* Ensure mm->user_ns contains the executable */
user_ns = old = bprm->mm->user_ns;
while ((user_ns != &init_user_ns) &&
!privileged_wrt_inode_uidgid(user_ns, inode))
user_ns = user_ns->parent;
if (old != user_ns) {
bprm->mm->user_ns = get_user_ns(user_ns);
put_user_ns(old);
}
}
}
EXPORT_SYMBOL(would_dump);
......@@ -1308,7 +1322,6 @@ void setup_new_exec(struct linux_binprm * bprm)
!gid_eq(bprm->cred->gid, current_egid())) {
current->pdeath_signal = 0;
} else {
would_dump(bprm, bprm->file);
if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
set_dumpable(current->mm, suid_dumpable);
}
......@@ -1408,7 +1421,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
unsigned n_fs;
if (p->ptrace) {
if (p->ptrace & PT_PTRACE_CAP)
if (ptracer_capable(p, current_user_ns()))
bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
else
bprm->unsafe |= LSM_UNSAFE_PTRACE;
......@@ -1743,6 +1756,8 @@ static int do_execveat_common(int fd, struct filename *filename,
if (retval < 0)
goto out;
would_dump(bprm, bprm->file);
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
......
......@@ -240,8 +240,10 @@ static inline bool ns_capable_noaudit(struct user_namespace *ns, int cap)
return true;
}
#endif /* CONFIG_MULTIUSER */
extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
/* audit system wants to get cap info from files as well */
extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
......
......@@ -1270,6 +1270,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *
unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags);
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
......
......@@ -473,6 +473,7 @@ struct mm_struct {
*/
struct task_struct __rcu *owner;
#endif
struct user_namespace *user_ns;
/* store ref to file /proc/<pid>/exe symlink points to */
struct file __rcu *exe_file;
......
......@@ -8,6 +8,9 @@
#include <linux/pid_namespace.h> /* For task_active_pid_ns. */
#include <uapi/linux/ptrace.h>
extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
/*
* Ptrace flags
*
......@@ -19,7 +22,6 @@
#define PT_SEIZED 0x00010000 /* SEIZE used, enable new behavior */
#define PT_PTRACED 0x00000001
#define PT_DTRACE 0x00000002 /* delayed trace (used on m68k, i386) */
#define PT_PTRACE_CAP 0x00000004 /* ptracer can follow suid-exec */
#define PT_OPT_FLAG_SHIFT 3
/* PT_TRACE_* event enable flags */
......
......@@ -1685,6 +1685,7 @@ struct task_struct {
struct list_head cpu_timers[3];
/* process credentials */
const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
const struct cred __rcu *real_cred; /* objective and real subjective task
* credentials (COW) */
const struct cred __rcu *cred; /* effective (overridable) subjective task
......
......@@ -456,6 +456,19 @@ bool file_ns_capable(const struct file *file, struct user_namespace *ns,
}
EXPORT_SYMBOL(file_ns_capable);
/**
* privileged_wrt_inode_uidgid - Do capabilities in the namespace work over the inode?
* @ns: The user namespace in question
* @inode: The inode in question
*
* Return true if the inode uid and gid are within the namespace.
*/
bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
{
return kuid_has_mapping(ns, inode->i_uid) &&
kgid_has_mapping(ns, inode->i_gid);
}
/**
* capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
* @inode: The inode in question
......@@ -469,7 +482,26 @@ bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
{
struct user_namespace *ns = current_user_ns();
return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
kgid_has_mapping(ns, inode->i_gid);
return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
}
EXPORT_SYMBOL(capable_wrt_inode_uidgid);
/**
* ptracer_capable - Determine if the ptracer holds CAP_SYS_PTRACE in the namespace
* @tsk: The task that may be ptraced
* @ns: The user namespace to search for CAP_SYS_PTRACE in
*
* Return true if the task that is ptracing the current task had CAP_SYS_PTRACE
* in the specified user namespace.
*/
bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns)
{
int ret = 0; /* An absent tracer adds no restrictions */
const struct cred *cred;
rcu_read_lock();
cred = rcu_dereference(tsk->ptracer_cred);
if (cred)
ret = security_capable_noaudit(cred, ns, CAP_SYS_PTRACE);
rcu_read_unlock();
return (ret == 0);
}
......@@ -747,7 +747,8 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
#endif
}
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
struct user_namespace *user_ns)
{
mm->mmap = NULL;
mm->mm_rb = RB_ROOT;
......@@ -787,6 +788,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
if (init_new_context(p, mm))
goto fail_nocontext;
mm->user_ns = get_user_ns(user_ns);
return mm;
fail_nocontext:
......@@ -832,7 +834,7 @@ struct mm_struct *mm_alloc(void)
return NULL;
memset(mm, 0, sizeof(*mm));
return mm_init(mm, current);
return mm_init(mm, current, current_user_ns());
}
/*
......@@ -847,6 +849,7 @@ void __mmdrop(struct mm_struct *mm)
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
check_mm(mm);
put_user_ns(mm->user_ns);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
......@@ -1128,7 +1131,7 @@ static struct mm_struct *dup_mm(struct task_struct *tsk)
memcpy(mm, oldmm, sizeof(*mm));
if (!mm_init(mm, tsk))
if (!mm_init(mm, tsk, mm->user_ns))
goto fail_nomem;
err = dup_mmap(mm, oldmm);
......
......@@ -27,6 +27,35 @@
#include <linux/cn_proc.h>
#include <linux/compat.h>
/*
* Access another process' address space via ptrace.
* Source/target buffer must be kernel space,
* Do not walk the page table directly, use get_user_pages
*/
int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags)
{
struct mm_struct *mm;
int ret;
mm = get_task_mm(tsk);
if (!mm)
return 0;
if (!tsk->ptrace ||
(current != tsk->parent) ||
((get_dumpable(mm) != SUID_DUMP_USER) &&
!ptracer_capable(tsk, mm->user_ns))) {
mmput(mm);
return 0;
}
ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags);
mmput(mm);
return ret;
}
/*
* ptrace a task: make the debugger its new parent and
......@@ -39,6 +68,9 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
BUG_ON(!list_empty(&child->ptrace_entry));
list_add(&child->ptrace_entry, &new_parent->ptraced);
child->parent = new_parent;
rcu_read_lock();
child->ptracer_cred = get_cred(__task_cred(new_parent));
rcu_read_unlock();
}
/**
......@@ -71,12 +103,16 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
*/
void __ptrace_unlink(struct task_struct *child)
{
const struct cred *old_cred;
BUG_ON(!child->ptrace);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
old_cred = child->ptracer_cred;
child->ptracer_cred = NULL;
put_cred(old_cred);
spin_lock(&child->sighand->siglock);
child->ptrace = 0;
......@@ -220,7 +256,7 @@ static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
{
const struct cred *cred = current_cred(), *tcred;
int dumpable = 0;
struct mm_struct *mm;
kuid_t caller_uid;
kgid_t caller_gid;
......@@ -271,16 +307,11 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
return -EPERM;
ok:
rcu_read_unlock();
smp_rmb();
if (task->mm)
dumpable = get_dumpable(task->mm);
rcu_read_lock();
if (dumpable != SUID_DUMP_USER &&
!ptrace_has_cap(__task_cred(task)->user_ns, mode)) {
rcu_read_unlock();
return -EPERM;
}
rcu_read_unlock();
mm = task->mm;
if (mm &&
((get_dumpable(mm) != SUID_DUMP_USER) &&
!ptrace_has_cap(mm->user_ns, mode)))
return -EPERM;
return security_ptrace_access_check(task, mode);
}
......@@ -344,10 +375,6 @@ static int ptrace_attach(struct task_struct *task, long request,
if (seize)
flags |= PT_SEIZED;
rcu_read_lock();
if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE))
flags |= PT_PTRACE_CAP;
rcu_read_unlock();
task->ptrace = flags;
__ptrace_link(task, current);
......@@ -537,7 +564,8 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
int this_len, retval;
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE);
retval = ptrace_access_vm(tsk, src, buf, this_len, FOLL_FORCE);
if (!retval) {
if (copied)
break;
......@@ -564,7 +592,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
this_len = (len > sizeof(buf)) ? sizeof(buf) : len;
if (copy_from_user(buf, src, this_len))
return -EFAULT;
retval = access_process_vm(tsk, dst, buf, this_len,
retval = ptrace_access_vm(tsk, dst, buf, this_len,
FOLL_FORCE | FOLL_WRITE);
if (!retval) {
if (copied)
......@@ -1128,7 +1156,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr,
unsigned long tmp;
int copied;
copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
copied = ptrace_access_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
return -EIO;
return put_user(tmp, (unsigned long __user *)data);
......@@ -1139,7 +1167,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr,
{
int copied;
copied = access_process_vm(tsk, addr, &data, sizeof(data),
copied = ptrace_access_vm(tsk, addr, &data, sizeof(data),
FOLL_FORCE | FOLL_WRITE);
return (copied == sizeof(data)) ? 0 : -EIO;
}
......@@ -1157,7 +1185,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
switch (request) {
case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
ret = access_process_vm(child, addr, &word, sizeof(word),
ret = ptrace_access_vm(child, addr, &word, sizeof(word),
FOLL_FORCE);
if (ret != sizeof(word))
ret = -EIO;
......@@ -1167,7 +1195,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
case PTRACE_POKETEXT:
case PTRACE_POKEDATA:
ret = access_process_vm(child, addr, &data, sizeof(data),
ret = ptrace_access_vm(child, addr, &data, sizeof(data),
FOLL_FORCE | FOLL_WRITE);
ret = (ret != sizeof(data) ? -EIO : 0);
break;
......
......@@ -6,6 +6,7 @@
#include <linux/cpumask.h>
#include <linux/atomic.h>
#include <linux/user_namespace.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
......@@ -21,5 +22,6 @@ struct mm_struct init_mm = {
.mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
.page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
INIT_MM_CONTEXT(init_mm)
};
......@@ -3904,7 +3904,7 @@ EXPORT_SYMBOL_GPL(generic_access_phys);
* Access another process' address space as given in mm. If non-NULL, use the
* given task for page fault accounting.
*/
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
......
......@@ -1808,7 +1808,7 @@ void filemap_map_pages(struct fault_env *fe,
}
EXPORT_SYMBOL(filemap_map_pages);
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, unsigned int gup_flags)
{
struct vm_area_struct *vma;
......
......@@ -151,8 +151,16 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode,
memset(&hmac_misc, 0, sizeof(hmac_misc));
hmac_misc.ino = inode->i_ino;
hmac_misc.generation = inode->i_generation;
hmac_misc.uid = from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
hmac_misc.gid = from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
/* The hmac uid and gid must be encoded in the initial user
* namespace (not the filesystems user namespace) as encoding
* them in the filesystems user namespace allows an attack
* where first they are written in an unprivileged fuse mount
* of a filesystem and then the system is tricked to mount the
* filesystem for real on next boot and trust it because
* everything is signed.
*/
hmac_misc.uid = from_kuid(&init_user_ns, inode->i_uid);
hmac_misc.gid = from_kgid(&init_user_ns, inode->i_gid);
hmac_misc.mode = inode->i_mode;
crypto_shash_update(desc, (const u8 *)&hmac_misc, sizeof(hmac_misc));
if (evm_hmac_attrs & EVM_ATTR_FSUUID)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment