Commit 0cbee992 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull user namespace updates from Eric Biederman:
 "Long ago and far away when user namespaces where young it was realized
  that allowing fresh mounts of proc and sysfs with only user namespace
  permissions could violate the basic rule that only root gets to decide
  if proc or sysfs should be mounted at all.

  Some hacks were put in place to reduce the worst of the damage could
  be done, and the common sense rule was adopted that fresh mounts of
  proc and sysfs should allow no more than bind mounts of proc and
  sysfs.  Unfortunately that rule has not been fully enforced.

  There are two kinds of gaps in that enforcement.  Only filesystems
  mounted on empty directories of proc and sysfs should be ignored but
  the test for empty directories was insufficient.  So in my tree
  directories on proc, sysctl and sysfs that will always be empty are
  created specially.  Every other technique is imperfect as an ordinary
  directory can have entries added even after a readdir returns and
  shows that the directory is empty.  Special creation of directories
  for mount points makes the code in the kernel a smidge clearer about
  it's purpose.  I asked container developers from the various container
  projects to help test this and no holes were found in the set of mount
  points on proc and sysfs that are created specially.

  This set of changes also starts enforcing the mount flags of fresh
  mounts of proc and sysfs are consistent with the existing mount of
  proc and sysfs.  I expected this to be the boring part of the work but
  unfortunately unprivileged userspace winds up mounting fresh copies of
  proc and sysfs with noexec and nosuid clear when root set those flags
  on the previous mount of proc and sysfs.  So for now only the atime,
  read-only and nodev attributes which userspace happens to keep
  consistent are enforced.  Dealing with the noexec and nosuid
  attributes remains for another time.

  This set of changes also addresses an issue with how open file
  descriptors from /proc/<pid>/ns/* are displayed.  Recently readlink of
  /proc/<pid>/fd has been triggering a WARN_ON that has not been
  meaningful since it was added (as all of the code in the kernel was
  converted) and is not now actively wrong.

  There is also a short list of issues that have not been fixed yet that
  I will mention briefly.

  It is possible to rename a directory from below to above a bind mount.
  At which point any directory pointers below the renamed directory can
  be walked up to the root directory of the filesystem.  With user
  namespaces enabled a bind mount of the bind mount can be created
  allowing the user to pick a directory whose children they can rename
  to outside of the bind mount.  This is challenging to fix and doubly
  so because all obvious solutions must touch code that is in the
  performance part of pathname resolution.

  As mentioned above there is also a question of how to ensure that
  developers by accident or with purpose do not introduce exectuable
  files on sysfs and proc and in doing so introduce security regressions
  in the current userspace that will not be immediately obvious and as
  such are likely to require breaking userspace in painful ways once
  they are recognized"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  vfs: Remove incorrect debugging WARN in prepend_path
  mnt: Update fs_fully_visible to test for permanently empty directories
  sysfs: Create mountpoints with sysfs_create_mount_point
  sysfs: Add support for permanently empty directories to serve as mount points.
  kernfs: Add support for always empty directories.
  proc: Allow creating permanently empty directories that serve as mount points
  sysctl: Allow creating permanently empty directories that serve as mountpoints.
  fs: Add helper functions for permanently empty directories.
  vfs: Ignore unlocked mounts in fs_fully_visible
  mnt: Modify fs_fully_visible to deal with locked ro nodev and atime
  mnt: Refactor the logic for mounting sysfs and proc in a user namespace
parents 2fee94b7 93e3bce6
...@@ -456,8 +456,6 @@ static const struct super_operations hypfs_s_ops = { ...@@ -456,8 +456,6 @@ static const struct super_operations hypfs_s_ops = {
.show_options = hypfs_show_options, .show_options = hypfs_show_options,
}; };
static struct kobject *s390_kobj;
static int __init hypfs_init(void) static int __init hypfs_init(void)
{ {
int rc; int rc;
...@@ -481,18 +479,16 @@ static int __init hypfs_init(void) ...@@ -481,18 +479,16 @@ static int __init hypfs_init(void)
rc = -ENODATA; rc = -ENODATA;
goto fail_hypfs_sprp_exit; goto fail_hypfs_sprp_exit;
} }
s390_kobj = kobject_create_and_add("s390", hypervisor_kobj); rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
if (!s390_kobj) { if (rc)
rc = -ENOMEM;
goto fail_hypfs_diag0c_exit; goto fail_hypfs_diag0c_exit;
}
rc = register_filesystem(&hypfs_type); rc = register_filesystem(&hypfs_type);
if (rc) if (rc)
goto fail_filesystem; goto fail_filesystem;
return 0; return 0;
fail_filesystem: fail_filesystem:
kobject_put(s390_kobj); sysfs_remove_mount_point(hypervisor_kobj, "s390");
fail_hypfs_diag0c_exit: fail_hypfs_diag0c_exit:
hypfs_diag0c_exit(); hypfs_diag0c_exit();
fail_hypfs_sprp_exit: fail_hypfs_sprp_exit:
...@@ -510,7 +506,7 @@ static int __init hypfs_init(void) ...@@ -510,7 +506,7 @@ static int __init hypfs_init(void)
static void __exit hypfs_exit(void) static void __exit hypfs_exit(void)
{ {
unregister_filesystem(&hypfs_type); unregister_filesystem(&hypfs_type);
kobject_put(s390_kobj); sysfs_remove_mount_point(hypervisor_kobj, "s390");
hypfs_diag0c_exit(); hypfs_diag0c_exit();
hypfs_sprp_exit(); hypfs_sprp_exit();
hypfs_vm_exit(); hypfs_vm_exit();
......
...@@ -66,7 +66,6 @@ static int __init parse_efi_cmdline(char *str) ...@@ -66,7 +66,6 @@ static int __init parse_efi_cmdline(char *str)
early_param("efi", parse_efi_cmdline); early_param("efi", parse_efi_cmdline);
struct kobject *efi_kobj; struct kobject *efi_kobj;
static struct kobject *efivars_kobj;
/* /*
* Let's not leave out systab information that snuck into * Let's not leave out systab information that snuck into
...@@ -218,10 +217,9 @@ static int __init efisubsys_init(void) ...@@ -218,10 +217,9 @@ static int __init efisubsys_init(void)
goto err_remove_group; goto err_remove_group;
/* and the standard mountpoint for efivarfs */ /* and the standard mountpoint for efivarfs */
efivars_kobj = kobject_create_and_add("efivars", efi_kobj); error = sysfs_create_mount_point(efi_kobj, "efivars");
if (!efivars_kobj) { if (error) {
pr_err("efivars: Subsystem registration failed.\n"); pr_err("efivars: Subsystem registration failed.\n");
error = -ENOMEM;
goto err_remove_group; goto err_remove_group;
} }
......
...@@ -129,8 +129,6 @@ void configfs_release_fs(void) ...@@ -129,8 +129,6 @@ void configfs_release_fs(void)
} }
static struct kobject *config_kobj;
static int __init configfs_init(void) static int __init configfs_init(void)
{ {
int err = -ENOMEM; int err = -ENOMEM;
...@@ -141,8 +139,8 @@ static int __init configfs_init(void) ...@@ -141,8 +139,8 @@ static int __init configfs_init(void)
if (!configfs_dir_cachep) if (!configfs_dir_cachep)
goto out; goto out;
config_kobj = kobject_create_and_add("config", kernel_kobj); err = sysfs_create_mount_point(kernel_kobj, "config");
if (!config_kobj) if (err)
goto out2; goto out2;
err = register_filesystem(&configfs_fs_type); err = register_filesystem(&configfs_fs_type);
...@@ -152,7 +150,7 @@ static int __init configfs_init(void) ...@@ -152,7 +150,7 @@ static int __init configfs_init(void)
return 0; return 0;
out3: out3:
pr_err("Unable to register filesystem!\n"); pr_err("Unable to register filesystem!\n");
kobject_put(config_kobj); sysfs_remove_mount_point(kernel_kobj, "config");
out2: out2:
kmem_cache_destroy(configfs_dir_cachep); kmem_cache_destroy(configfs_dir_cachep);
configfs_dir_cachep = NULL; configfs_dir_cachep = NULL;
...@@ -163,7 +161,7 @@ static int __init configfs_init(void) ...@@ -163,7 +161,7 @@ static int __init configfs_init(void)
static void __exit configfs_exit(void) static void __exit configfs_exit(void)
{ {
unregister_filesystem(&configfs_fs_type); unregister_filesystem(&configfs_fs_type);
kobject_put(config_kobj); sysfs_remove_mount_point(kernel_kobj, "config");
kmem_cache_destroy(configfs_dir_cachep); kmem_cache_destroy(configfs_dir_cachep);
configfs_dir_cachep = NULL; configfs_dir_cachep = NULL;
} }
......
...@@ -2927,17 +2927,6 @@ static int prepend_path(const struct path *path, ...@@ -2927,17 +2927,6 @@ static int prepend_path(const struct path *path,
vfsmnt = &mnt->mnt; vfsmnt = &mnt->mnt;
continue; continue;
} }
/*
* Filesystems needing to implement special "root names"
* should do so with ->d_dname()
*/
if (IS_ROOT(dentry) &&
(dentry->d_name.len != 1 ||
dentry->d_name.name[0] != '/')) {
WARN(1, "Root dentry has weird name <%.*s>\n",
(int) dentry->d_name.len,
dentry->d_name.name);
}
if (!error) if (!error)
error = is_mounted(vfsmnt) ? 1 : 2; error = is_mounted(vfsmnt) ? 1 : 2;
break; break;
......
...@@ -716,20 +716,17 @@ bool debugfs_initialized(void) ...@@ -716,20 +716,17 @@ bool debugfs_initialized(void)
} }
EXPORT_SYMBOL_GPL(debugfs_initialized); EXPORT_SYMBOL_GPL(debugfs_initialized);
static struct kobject *debug_kobj;
static int __init debugfs_init(void) static int __init debugfs_init(void)
{ {
int retval; int retval;
debug_kobj = kobject_create_and_add("debug", kernel_kobj); retval = sysfs_create_mount_point(kernel_kobj, "debug");
if (!debug_kobj) if (retval)
return -EINVAL; return retval;
retval = register_filesystem(&debug_fs_type); retval = register_filesystem(&debug_fs_type);
if (retval) if (retval)
kobject_put(debug_kobj); sysfs_remove_mount_point(kernel_kobj, "debug");
else else
debugfs_registered = true; debugfs_registered = true;
......
...@@ -1294,7 +1294,6 @@ static void fuse_fs_cleanup(void) ...@@ -1294,7 +1294,6 @@ static void fuse_fs_cleanup(void)
} }
static struct kobject *fuse_kobj; static struct kobject *fuse_kobj;
static struct kobject *connections_kobj;
static int fuse_sysfs_init(void) static int fuse_sysfs_init(void)
{ {
...@@ -1306,11 +1305,9 @@ static int fuse_sysfs_init(void) ...@@ -1306,11 +1305,9 @@ static int fuse_sysfs_init(void)
goto out_err; goto out_err;
} }
connections_kobj = kobject_create_and_add("connections", fuse_kobj); err = sysfs_create_mount_point(fuse_kobj, "connections");
if (!connections_kobj) { if (err)
err = -ENOMEM;
goto out_fuse_unregister; goto out_fuse_unregister;
}
return 0; return 0;
...@@ -1322,7 +1319,7 @@ static int fuse_sysfs_init(void) ...@@ -1322,7 +1319,7 @@ static int fuse_sysfs_init(void)
static void fuse_sysfs_cleanup(void) static void fuse_sysfs_cleanup(void)
{ {
kobject_put(connections_kobj); sysfs_remove_mount_point(fuse_kobj, "connections");
kobject_put(fuse_kobj); kobject_put(fuse_kobj);
} }
......
...@@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *kn) ...@@ -592,6 +592,9 @@ int kernfs_add_one(struct kernfs_node *kn)
goto out_unlock; goto out_unlock;
ret = -ENOENT; ret = -ENOENT;
if (parent->flags & KERNFS_EMPTY_DIR)
goto out_unlock;
if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
goto out_unlock; goto out_unlock;
...@@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, ...@@ -783,6 +786,38 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
return ERR_PTR(rc); return ERR_PTR(rc);
} }
/**
* kernfs_create_empty_dir - create an always empty directory
* @parent: parent in which to create a new directory
* @name: name of the new directory
*
* Returns the created node on success, ERR_PTR() value on failure.
*/
struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
const char *name)
{
struct kernfs_node *kn;
int rc;
/* allocate */
kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, KERNFS_DIR);
if (!kn)
return ERR_PTR(-ENOMEM);
kn->flags |= KERNFS_EMPTY_DIR;
kn->dir.root = parent->dir.root;
kn->ns = NULL;
kn->priv = NULL;
/* link in */
rc = kernfs_add_one(kn);
if (!rc)
return kn;
kernfs_put(kn);
return ERR_PTR(rc);
}
static struct dentry *kernfs_iop_lookup(struct inode *dir, static struct dentry *kernfs_iop_lookup(struct inode *dir,
struct dentry *dentry, struct dentry *dentry,
unsigned int flags) unsigned int flags)
...@@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, ...@@ -1254,7 +1289,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
mutex_lock(&kernfs_mutex); mutex_lock(&kernfs_mutex);
error = -ENOENT; error = -ENOENT;
if (!kernfs_active(kn) || !kernfs_active(new_parent)) if (!kernfs_active(kn) || !kernfs_active(new_parent) ||
(new_parent->flags & KERNFS_EMPTY_DIR))
goto out; goto out;
error = 0; error = 0;
......
...@@ -296,6 +296,8 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode) ...@@ -296,6 +296,8 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
case KERNFS_DIR: case KERNFS_DIR:
inode->i_op = &kernfs_dir_iops; inode->i_op = &kernfs_dir_iops;
inode->i_fop = &kernfs_dir_fops; inode->i_fop = &kernfs_dir_fops;
if (kn->flags & KERNFS_EMPTY_DIR)
make_empty_dir_inode(inode);
break; break;
case KERNFS_FILE: case KERNFS_FILE:
inode->i_size = kn->attr.size; inode->i_size = kn->attr.size;
......
...@@ -1108,3 +1108,98 @@ const struct inode_operations simple_symlink_inode_operations = { ...@@ -1108,3 +1108,98 @@ const struct inode_operations simple_symlink_inode_operations = {
.readlink = generic_readlink .readlink = generic_readlink
}; };
EXPORT_SYMBOL(simple_symlink_inode_operations); EXPORT_SYMBOL(simple_symlink_inode_operations);
/*
* Operations for a permanently empty directory.
*/
static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
{
return ERR_PTR(-ENOENT);
}
static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct inode *inode = d_inode(dentry);
generic_fillattr(inode, stat);
return 0;
}
static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr)
{
return -EPERM;
}
static int empty_dir_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
return -EOPNOTSUPP;
}
static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name,
void *value, size_t size)
{
return -EOPNOTSUPP;
}
static int empty_dir_removexattr(struct dentry *dentry, const char *name)
{
return -EOPNOTSUPP;
}
static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
{
return -EOPNOTSUPP;
}
static const struct inode_operations empty_dir_inode_operations = {
.lookup = empty_dir_lookup,
.permission = generic_permission,
.setattr = empty_dir_setattr,
.getattr = empty_dir_getattr,
.setxattr = empty_dir_setxattr,
.getxattr = empty_dir_getxattr,
.removexattr = empty_dir_removexattr,
.listxattr = empty_dir_listxattr,
};
static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
{
/* An empty directory has two entries . and .. at offsets 0 and 1 */
return generic_file_llseek_size(file, offset, whence, 2, 2);
}
static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
{
dir_emit_dots(file, ctx);
return 0;
}
static const struct file_operations empty_dir_operations = {
.llseek = empty_dir_llseek,
.read = generic_read_dir,
.iterate = empty_dir_readdir,
.fsync = noop_fsync,
};
void make_empty_dir_inode(struct inode *inode)
{
set_nlink(inode, 2);
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_rdev = 0;
inode->i_size = 2;
inode->i_blkbits = PAGE_SHIFT;
inode->i_blocks = 0;
inode->i_op = &empty_dir_inode_operations;
inode->i_fop = &empty_dir_operations;
}
bool is_empty_dir_inode(struct inode *inode)
{
return (inode->i_fop == &empty_dir_operations) &&
(inode->i_op == &empty_dir_inode_operations);
}
...@@ -2343,6 +2343,8 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) ...@@ -2343,6 +2343,8 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
return err; return err;
} }
static bool fs_fully_visible(struct file_system_type *fs_type, int *new_mnt_flags);
/* /*
* create a new mount for userspace and request it to be added into the * create a new mount for userspace and request it to be added into the
* namespace's tree * namespace's tree
...@@ -2374,6 +2376,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags, ...@@ -2374,6 +2376,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
flags |= MS_NODEV; flags |= MS_NODEV;
mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV; mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
} }
if (type->fs_flags & FS_USERNS_VISIBLE) {
if (!fs_fully_visible(type, &mnt_flags))
return -EPERM;
}
} }
mnt = vfs_kern_mount(type, flags, name, data); mnt = vfs_kern_mount(type, flags, name, data);
...@@ -3175,9 +3181,10 @@ bool current_chrooted(void) ...@@ -3175,9 +3181,10 @@ bool current_chrooted(void)
return chrooted; return chrooted;
} }
bool fs_fully_visible(struct file_system_type *type) static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
{ {
struct mnt_namespace *ns = current->nsproxy->mnt_ns; struct mnt_namespace *ns = current->nsproxy->mnt_ns;
int new_flags = *new_mnt_flags;
struct mount *mnt; struct mount *mnt;
bool visible = false; bool visible = false;
...@@ -3196,16 +3203,36 @@ bool fs_fully_visible(struct file_system_type *type) ...@@ -3196,16 +3203,36 @@ bool fs_fully_visible(struct file_system_type *type)
if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
continue; continue;
/* This mount is not fully visible if there are any child mounts /* Verify the mount flags are equal to or more permissive
* that cover anything except for empty directories. * than the proposed new mount.
*/
if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
!(new_flags & MNT_READONLY))
continue;
if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
!(new_flags & MNT_NODEV))
continue;
if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
continue;
/* This mount is not fully visible if there are any
* locked child mounts that cover anything except for
* empty directories.
*/ */
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
struct inode *inode = child->mnt_mountpoint->d_inode; struct inode *inode = child->mnt_mountpoint->d_inode;
if (!S_ISDIR(inode->i_mode)) /* Only worry about locked mounts */
goto next; if (!(mnt->mnt.mnt_flags & MNT_LOCKED))
if (inode->i_nlink > 2) continue;
/* Is the directory permanetly empty? */
if (!is_empty_dir_inode(inode))
goto next; goto next;
} }
/* Preserve the locked attributes */
*new_mnt_flags |= mnt->mnt.mnt_flags & (MNT_LOCK_READONLY | \
MNT_LOCK_NODEV | \
MNT_LOCK_ATIME);
visible = true; visible = true;
goto found; goto found;
next: ; next: ;
......
...@@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, ...@@ -373,6 +373,10 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
WARN(1, "create '/proc/%s' by hand\n", qstr.name); WARN(1, "create '/proc/%s' by hand\n", qstr.name);
return NULL; return NULL;
} }
if (is_empty_pde(*parent)) {
WARN(1, "attempt to add to permanently empty directory");
return NULL;
}
ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL); ent = kzalloc(sizeof(struct proc_dir_entry) + qstr.len + 1, GFP_KERNEL);
if (!ent) if (!ent)
...@@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const char *name, ...@@ -455,6 +459,25 @@ struct proc_dir_entry *proc_mkdir(const char *name,
} }
EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(proc_mkdir);
struct proc_dir_entry *proc_create_mount_point(const char *name)
{
umode_t mode = S_IFDIR | S_IRUGO | S_IXUGO;
struct proc_dir_entry *ent, *parent = NULL;
ent = __proc_create(&parent, name, mode, 2);
if (ent) {
ent->data = NULL;
ent->proc_fops = NULL;
ent->proc_iops = NULL;
if (proc_register(parent, ent) < 0) {
kfree(ent);
parent->nlink--;
ent = NULL;
}
}
return ent;
}
struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
struct proc_dir_entry *parent, struct proc_dir_entry *parent,
const struct file_operations *proc_fops, const struct file_operations *proc_fops,
......
...@@ -422,6 +422,10 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) ...@@ -422,6 +422,10 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
PROC_I(inode)->pde = de; PROC_I(inode)->pde = de;
if (is_empty_pde(de)) {
make_empty_dir_inode(inode);
return inode;
}
if (de->mode) { if (de->mode) {
inode->i_mode = de->mode; inode->i_mode = de->mode;
inode->i_uid = de->uid; inode->i_uid = de->uid;
......
...@@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) ...@@ -191,6 +191,12 @@ static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
} }
extern void pde_put(struct proc_dir_entry *); extern void pde_put(struct proc_dir_entry *);
static inline bool is_empty_pde(const struct proc_dir_entry *pde)
{
return S_ISDIR(pde->mode) && !pde->proc_iops;
}
struct proc_dir_entry *proc_create_mount_point(const char *name);
/* /*
* inode.c * inode.c
*/ */
......
...@@ -19,6 +19,28 @@ static const struct inode_operations proc_sys_inode_operations; ...@@ -19,6 +19,28 @@ static const struct inode_operations proc_sys_inode_operations;
static const struct file_operations proc_sys_dir_file_operations; static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations; static const struct inode_operations proc_sys_dir_operations;
/* Support for permanently empty directories */
struct ctl_table sysctl_mount_point[] = {
{ }
};
static bool is_empty_dir(struct ctl_table_header *head)
{
return head->ctl_table[0].child == sysctl_mount_point;
}
static void set_empty_dir(struct ctl_dir *dir)
{
dir->header.ctl_table[0].child = sysctl_mount_point;
}
static void clear_empty_dir(struct ctl_dir *dir)
{
dir->header.ctl_table[0].child = NULL;
}
void proc_sys_poll_notify(struct ctl_table_poll *poll) void proc_sys_poll_notify(struct ctl_table_poll *poll)
{ {
if (!poll) if (!poll)
...@@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) ...@@ -187,6 +209,17 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
struct ctl_table *entry; struct ctl_table *entry;
int err; int err;
/* Is this a permanently empty directory? */
if (is_empty_dir(&dir->header))
return -EROFS;
/* Am I creating a permanently empty directory? */
if (header->ctl_table == sysctl_mount_point) {
if (!RB_EMPTY_ROOT(&dir->root))
return -EINVAL;
set_empty_dir(dir);
}
dir->header.nreg++; dir->header.nreg++;
header->parent = dir; header->parent = dir;
err = insert_links(header); err = insert_links(header);
...@@ -202,6 +235,8 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header) ...@@ -202,6 +235,8 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
erase_header(header); erase_header(header);
put_links(header); put_links(header);
fail_links: fail_links:
if (header->ctl_table == sysctl_mount_point)
clear_empty_dir(dir);
header->parent = NULL; header->parent = NULL;
drop_sysctl_table(&dir->header); drop_sysctl_table(&dir->header);
return err; return err;
...@@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, ...@@ -419,6 +454,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
inode->i_mode |= S_IFDIR; inode->i_mode |= S_IFDIR;
inode->i_op = &proc_sys_dir_operations; inode->i_op = &proc_sys_dir_operations;
inode->i_fop = &proc_sys_dir_file_operations; inode->i_fop = &proc_sys_dir_file_operations;
if (is_empty_dir(head))
make_empty_dir_inode(inode);
} }
out: out:
return inode; return inode;
......
...@@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ...@@ -112,9 +112,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
ns = task_active_pid_ns(current); ns = task_active_pid_ns(current);
options = data; options = data;
if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
return ERR_PTR(-EPERM);
/* Does the mounter have privilege over the pid namespace? */ /* Does the mounter have privilege over the pid namespace? */
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
...@@ -159,7 +156,7 @@ static struct file_system_type proc_fs_type = { ...@@ -159,7 +156,7 @@ static struct file_system_type proc_fs_type = {
.name = "proc", .name = "proc",
.mount = proc_mount, .mount = proc_mount,
.kill_sb = proc_kill_sb, .kill_sb = proc_kill_sb,
.fs_flags = FS_USERNS_MOUNT, .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
}; };
void __init proc_root_init(void) void __init proc_root_init(void)
...@@ -182,10 +179,10 @@ void __init proc_root_init(void) ...@@ -182,10 +179,10 @@ void __init proc_root_init(void)
#endif #endif
proc_mkdir("fs", NULL); proc_mkdir("fs", NULL);
proc_mkdir("driver", NULL); proc_mkdir("driver", NULL);
proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */ proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE) #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
/* just give it a mountpoint */ /* just give it a mountpoint */
proc_mkdir("openprom", NULL); proc_create_mount_point("openprom");
#endif #endif
proc_tty_init(); proc_tty_init();
proc_mkdir("bus", NULL); proc_mkdir("bus", NULL);
......
...@@ -461,22 +461,18 @@ static struct file_system_type pstore_fs_type = { ...@@ -461,22 +461,18 @@ static struct file_system_type pstore_fs_type = {
.kill_sb = pstore_kill_sb, .kill_sb = pstore_kill_sb,
}; };
static struct kobject *pstore_kobj;
static int __init init_pstore_fs(void) static int __init init_pstore_fs(void)
{ {
int err = 0; int err;
/* Create a convenient mount point for people to access pstore */ /* Create a convenient mount point for people to access pstore */
pstore_kobj = kobject_create_and_add("pstore", fs_kobj); err = sysfs_create_mount_point(fs_kobj, "pstore");
if (!pstore_kobj) { if (err)
err = -ENOMEM;
goto out; goto out;
}
err = register_filesystem(&pstore_fs_type); err = register_filesystem(&pstore_fs_type);
if (err < 0) if (err < 0)
kobject_put(pstore_kobj); sysfs_remove_mount_point(fs_kobj, "pstore");
out: out:
return err; return err;
......
...@@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, ...@@ -121,3 +121,37 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
return kernfs_rename_ns(kn, new_parent, kn->name, new_ns); return kernfs_rename_ns(kn, new_parent, kn->name, new_ns);
} }
/**
* sysfs_create_mount_point - create an always empty directory
* @parent_kobj: kobject that will contain this always empty directory
* @name: The name of the always empty directory to add
*/
int sysfs_create_mount_point(struct kobject *parent_kobj, const char *name)
{
struct kernfs_node *kn, *parent = parent_kobj->sd;
kn = kernfs_create_empty_dir(parent, name);
if (IS_ERR(kn)) {
if (PTR_ERR(kn) == -EEXIST)
sysfs_warn_dup(parent, name);
return PTR_ERR(kn);
}
return 0;
}
EXPORT_SYMBOL_GPL(sysfs_create_mount_point);
/**
* sysfs_remove_mount_point - remove an always empty directory.
* @parent_kobj: kobject that will contain this always empty directory
* @name: The name of the always empty directory to remove
*
*/
void sysfs_remove_mount_point(struct kobject *parent_kobj, const char *name)
{
struct kernfs_node *parent = parent_kobj->sd;
kernfs_remove_by_name_ns(parent, name, NULL);
}
EXPORT_SYMBOL_GPL(sysfs_remove_mount_point);
...@@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, ...@@ -31,9 +31,6 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
bool new_sb; bool new_sb;
if (!(flags & MS_KERNMOUNT)) { if (!(flags & MS_KERNMOUNT)) {
if (!capable(CAP_SYS_ADMIN) && !fs_fully_visible(fs_type))
return ERR_PTR(-EPERM);
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
} }
...@@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_type = { ...@@ -58,7 +55,7 @@ static struct file_system_type sysfs_fs_type = {
.name = "sysfs", .name = "sysfs",
.mount = sysfs_mount, .mount = sysfs_mount,
.kill_sb = sysfs_kill_sb, .kill_sb = sysfs_kill_sb,
.fs_flags = FS_USERNS_MOUNT, .fs_flags = FS_USERNS_VISIBLE | FS_USERNS_MOUNT,
}; };
int __init sysfs_init(void) int __init sysfs_init(void)
......
...@@ -631,14 +631,12 @@ bool tracefs_initialized(void) ...@@ -631,14 +631,12 @@ bool tracefs_initialized(void)
return tracefs_registered; return tracefs_registered;
} }
static struct kobject *trace_kobj;
static int __init tracefs_init(void) static int __init tracefs_init(void)
{ {
int retval; int retval;
trace_kobj = kobject_create_and_add("tracing", kernel_kobj); retval = sysfs_create_mount_point(kernel_kobj, "tracing");
if (!trace_kobj) if (retval)
return -EINVAL; return -EINVAL;
retval = register_filesystem(&trace_fs_type); retval = register_filesystem(&trace_fs_type);
......
...@@ -1917,6 +1917,7 @@ struct file_system_type { ...@@ -1917,6 +1917,7 @@ struct file_system_type {
#define FS_HAS_SUBTYPE 4 #define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */ #define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */ #define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
#define FS_USERNS_VISIBLE 32 /* FS must already be visible */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
struct dentry *(*mount) (struct file_system_type *, int, struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *); const char *, void *);
...@@ -2004,7 +2005,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *); ...@@ -2004,7 +2005,6 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
extern int freeze_super(struct super_block *super); extern int freeze_super(struct super_block *super);
extern int thaw_super(struct super_block *super); extern int thaw_super(struct super_block *super);
extern bool our_mnt(struct vfsmount *mnt); extern bool our_mnt(struct vfsmount *mnt);
extern bool fs_fully_visible(struct file_system_type *);
extern int current_umask(void); extern int current_umask(void);
...@@ -2816,6 +2816,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in ...@@ -2816,6 +2816,8 @@ extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned in
extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
extern const struct file_operations simple_dir_operations; extern const struct file_operations simple_dir_operations;
extern const struct inode_operations simple_dir_inode_operations; extern const struct inode_operations simple_dir_inode_operations;
extern void make_empty_dir_inode(struct inode *inode);
extern bool is_empty_dir_inode(struct inode *inode);
struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct tree_descr { char *name; const struct file_operations *ops; int mode; };
struct dentry *d_alloc_name(struct dentry *, const char *); struct dentry *d_alloc_name(struct dentry *, const char *);
extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
......
...@@ -45,6 +45,7 @@ enum kernfs_node_flag { ...@@ -45,6 +45,7 @@ enum kernfs_node_flag {
KERNFS_LOCKDEP = 0x0100, KERNFS_LOCKDEP = 0x0100,
KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDAL = 0x0400,
KERNFS_SUICIDED = 0x0800, KERNFS_SUICIDED = 0x0800,
KERNFS_EMPTY_DIR = 0x1000,
}; };
/* @flags for kernfs_create_root() */ /* @flags for kernfs_create_root() */
...@@ -286,6 +287,8 @@ void kernfs_destroy_root(struct kernfs_root *root); ...@@ -286,6 +287,8 @@ void kernfs_destroy_root(struct kernfs_root *root);
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
const char *name, umode_t mode, const char *name, umode_t mode,
void *priv, const void *ns); void *priv, const void *ns);
struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
const char *name);
struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
const char *name, const char *name,
umode_t mode, loff_t size, umode_t mode, loff_t size,
......
...@@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, ...@@ -188,6 +188,9 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
void unregister_sysctl_table(struct ctl_table_header * table); void unregister_sysctl_table(struct ctl_table_header * table);
extern int sysctl_init(void); extern int sysctl_init(void);
extern struct ctl_table sysctl_mount_point[];
#else /* CONFIG_SYSCTL */ #else /* CONFIG_SYSCTL */
static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
{ {
......
...@@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, ...@@ -210,6 +210,10 @@ int __must_check sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
int __must_check sysfs_move_dir_ns(struct kobject *kobj, int __must_check sysfs_move_dir_ns(struct kobject *kobj,
struct kobject *new_parent_kobj, struct kobject *new_parent_kobj,
const void *new_ns); const void *new_ns);
int __must_check sysfs_create_mount_point(struct kobject *parent_kobj,
const char *name);
void sysfs_remove_mount_point(struct kobject *parent_kobj,
const char *name);
int __must_check sysfs_create_file_ns(struct kobject *kobj, int __must_check sysfs_create_file_ns(struct kobject *kobj,
const struct attribute *attr, const struct attribute *attr,
...@@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj, ...@@ -298,6 +302,17 @@ static inline int sysfs_move_dir_ns(struct kobject *kobj,
return 0; return 0;
} }
static inline int sysfs_create_mount_point(struct kobject *parent_kobj,
const char *name)
{
return 0;
}
static inline void sysfs_remove_mount_point(struct kobject *parent_kobj,
const char *name)
{
}
static inline int sysfs_create_file_ns(struct kobject *kobj, static inline int sysfs_create_file_ns(struct kobject *kobj,
const struct attribute *attr, const struct attribute *attr,
const void *ns) const void *ns)
......
...@@ -1939,8 +1939,6 @@ static struct file_system_type cgroup_fs_type = { ...@@ -1939,8 +1939,6 @@ static struct file_system_type cgroup_fs_type = {
.kill_sb = cgroup_kill_sb, .kill_sb = cgroup_kill_sb,
}; };
static struct kobject *cgroup_kobj;
/** /**
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
* @task: target task * @task: target task
...@@ -5070,13 +5068,13 @@ int __init cgroup_init(void) ...@@ -5070,13 +5068,13 @@ int __init cgroup_init(void)
ss->bind(init_css_set.subsys[ssid]); ss->bind(init_css_set.subsys[ssid]);
} }
cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); err = sysfs_create_mount_point(fs_kobj, "cgroup");
if (!cgroup_kobj) if (err)
return -ENOMEM; return err;
err = register_filesystem(&cgroup_fs_type); err = register_filesystem(&cgroup_fs_type);
if (err < 0) { if (err < 0) {
kobject_put(cgroup_kobj); sysfs_remove_mount_point(fs_kobj, "cgroup");
return err; return err;
} }
......
...@@ -1538,12 +1538,6 @@ static struct ctl_table vm_table[] = { ...@@ -1538,12 +1538,6 @@ static struct ctl_table vm_table[] = {
{ } { }
}; };
#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
static struct ctl_table binfmt_misc_table[] = {
{ }
};
#endif
static struct ctl_table fs_table[] = { static struct ctl_table fs_table[] = {
{ {
.procname = "inode-nr", .procname = "inode-nr",
...@@ -1697,7 +1691,7 @@ static struct ctl_table fs_table[] = { ...@@ -1697,7 +1691,7 @@ static struct ctl_table fs_table[] = {
{ {
.procname = "binfmt_misc", .procname = "binfmt_misc",
.mode = 0555, .mode = 0555,
.child = binfmt_misc_table, .child = sysctl_mount_point,
}, },
#endif #endif
{ {
......
...@@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *dentry) ...@@ -215,19 +215,17 @@ void securityfs_remove(struct dentry *dentry)
} }
EXPORT_SYMBOL_GPL(securityfs_remove); EXPORT_SYMBOL_GPL(securityfs_remove);
static struct kobject *security_kobj;
static int __init securityfs_init(void) static int __init securityfs_init(void)
{ {
int retval; int retval;
security_kobj = kobject_create_and_add("security", kernel_kobj); retval = sysfs_create_mount_point(kernel_kobj, "security");
if (!security_kobj) if (retval)
return -EINVAL; return retval;
retval = register_filesystem(&fs_type); retval = register_filesystem(&fs_type);
if (retval) if (retval)
kobject_put(security_kobj); sysfs_remove_mount_point(kernel_kobj, "security");
return retval; return retval;
} }
......
...@@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_type = { ...@@ -1853,7 +1853,6 @@ static struct file_system_type sel_fs_type = {
}; };
struct vfsmount *selinuxfs_mount; struct vfsmount *selinuxfs_mount;
static struct kobject *selinuxfs_kobj;
static int __init init_sel_fs(void) static int __init init_sel_fs(void)
{ {
...@@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void) ...@@ -1862,13 +1861,13 @@ static int __init init_sel_fs(void)
if (!selinux_enabled) if (!selinux_enabled)
return 0; return 0;
selinuxfs_kobj = kobject_create_and_add("selinux", fs_kobj); err = sysfs_create_mount_point(fs_kobj, "selinux");
if (!selinuxfs_kobj) if (err)
return -ENOMEM; return err;
err = register_filesystem(&sel_fs_type); err = register_filesystem(&sel_fs_type);
if (err) { if (err) {
kobject_put(selinuxfs_kobj); sysfs_remove_mount_point(fs_kobj, "selinux");
return err; return err;
} }
...@@ -1887,7 +1886,7 @@ __initcall(init_sel_fs); ...@@ -1887,7 +1886,7 @@ __initcall(init_sel_fs);
#ifdef CONFIG_SECURITY_SELINUX_DISABLE #ifdef CONFIG_SECURITY_SELINUX_DISABLE
void exit_sel_fs(void) void exit_sel_fs(void)
{ {
kobject_put(selinuxfs_kobj); sysfs_remove_mount_point(fs_kobj, "selinux");
kern_unmount(selinuxfs_mount); kern_unmount(selinuxfs_mount);
unregister_filesystem(&sel_fs_type); unregister_filesystem(&sel_fs_type);
} }
......
...@@ -2314,16 +2314,16 @@ static const struct file_operations smk_revoke_subj_ops = { ...@@ -2314,16 +2314,16 @@ static const struct file_operations smk_revoke_subj_ops = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
}; };
static struct kset *smackfs_kset;
/** /**
* smk_init_sysfs - initialize /sys/fs/smackfs * smk_init_sysfs - initialize /sys/fs/smackfs
* *
*/ */
static int smk_init_sysfs(void) static int smk_init_sysfs(void)
{ {
smackfs_kset = kset_create_and_add("smackfs", NULL, fs_kobj); int err;
if (!smackfs_kset) err = sysfs_create_mount_point(fs_kobj, "smackfs");
return -ENOMEM; if (err)
return err;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment