Commit 8b0fdf63 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'work.mqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull mqueue/bpf vfs cleanups from Al Viro:
 "mqueue and bpf go through rather painful and similar contortions to
  create objects in their dentry trees. Provide a primitive for doing
  that without abusing ->mknod(), switch bpf and mqueue to it.

  Another mqueue-related thing that has ended up in that branch is
  on-demand creation of internal mount (based upon the work of Giuseppe
  Scrivano)"

* 'work.mqueue' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  mqueue: switch to on-demand creation of internal mount
  tidy do_mq_open() up a bit
  mqueue: clean prepare_open() up
  do_mq_open(): move all work prior to dentry_open() into a helper
  mqueue: fold mq_attr_ok() into mqueue_get_inode()
  move dentry_open() calls up into do_mq_open()
  mqueue: switch to vfs_mkobj(), quit abusing ->d_fsdata
  bpf_obj_do_pin(): switch to vfs_mkobj(), quit abusing ->mknod()
  new primitive: vfs_mkobj()
parents 168fe32a 36735a6a
...@@ -2895,6 +2895,27 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ...@@ -2895,6 +2895,27 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
} }
EXPORT_SYMBOL(vfs_create); EXPORT_SYMBOL(vfs_create);
int vfs_mkobj(struct dentry *dentry, umode_t mode,
int (*f)(struct dentry *, umode_t, void *),
void *arg)
{
struct inode *dir = dentry->d_parent->d_inode;
int error = may_create(dir, dentry);
if (error)
return error;
mode &= S_IALLUGO;
mode |= S_IFREG;
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
error = f(dentry, mode, arg);
if (!error)
fsnotify_create(dir, dentry);
return error;
}
EXPORT_SYMBOL(vfs_mkobj);
bool may_open_dev(const struct path *path) bool may_open_dev(const struct path *path)
{ {
return !(path->mnt->mnt_flags & MNT_NODEV) && return !(path->mnt->mnt_flags & MNT_NODEV) &&
......
...@@ -1608,6 +1608,10 @@ extern int vfs_whiteout(struct inode *, struct dentry *); ...@@ -1608,6 +1608,10 @@ extern int vfs_whiteout(struct inode *, struct dentry *);
extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
int open_flag); int open_flag);
int vfs_mkobj(struct dentry *, umode_t,
int (*f)(struct dentry *, umode_t, void *),
void *);
/* /*
* VFS file helper functions. * VFS file helper functions.
*/ */
......
...@@ -270,13 +270,30 @@ static struct inode *mqueue_get_inode(struct super_block *sb, ...@@ -270,13 +270,30 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
* that means the min(mq_maxmsg, max_priorities) * struct * that means the min(mq_maxmsg, max_priorities) * struct
* posix_msg_tree_node. * posix_msg_tree_node.
*/ */
ret = -EINVAL;
if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0)
goto out_inode;
if (capable(CAP_SYS_RESOURCE)) {
if (info->attr.mq_maxmsg > HARD_MSGMAX ||
info->attr.mq_msgsize > HARD_MSGSIZEMAX)
goto out_inode;
} else {
if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max ||
info->attr.mq_msgsize > ipc_ns->mq_msgsize_max)
goto out_inode;
}
ret = -EOVERFLOW;
/* check for overflow */
if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg)
goto out_inode;
mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
sizeof(struct posix_msg_tree_node); sizeof(struct posix_msg_tree_node);
mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize;
mq_bytes = mq_treesize + (info->attr.mq_maxmsg * if (mq_bytes + mq_treesize < mq_bytes)
info->attr.mq_msgsize); goto out_inode;
mq_bytes += mq_treesize;
spin_lock(&mq_lock); spin_lock(&mq_lock);
if (u->mq_bytes + mq_bytes < u->mq_bytes || if (u->mq_bytes + mq_bytes < u->mq_bytes ||
u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) {
...@@ -308,8 +325,9 @@ static struct inode *mqueue_get_inode(struct super_block *sb, ...@@ -308,8 +325,9 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
static int mqueue_fill_super(struct super_block *sb, void *data, int silent) static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
{ {
struct inode *inode; struct inode *inode;
struct ipc_namespace *ns = sb->s_fs_info; struct ipc_namespace *ns = data;
sb->s_fs_info = ns;
sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
sb->s_blocksize = PAGE_SIZE; sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT; sb->s_blocksize_bits = PAGE_SHIFT;
...@@ -326,18 +344,44 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) ...@@ -326,18 +344,44 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent)
return 0; return 0;
} }
static struct file_system_type mqueue_fs_type;
/*
* Return value is pinned only by reference in ->mq_mnt; it will
* live until ipcns dies. Caller does not need to drop it.
*/
static struct vfsmount *mq_internal_mount(void)
{
struct ipc_namespace *ns = current->nsproxy->ipc_ns;
struct vfsmount *m = ns->mq_mnt;
if (m)
return m;
m = kern_mount_data(&mqueue_fs_type, ns);
spin_lock(&mq_lock);
if (unlikely(ns->mq_mnt)) {
spin_unlock(&mq_lock);
if (!IS_ERR(m))
kern_unmount(m);
return ns->mq_mnt;
}
if (!IS_ERR(m))
ns->mq_mnt = m;
spin_unlock(&mq_lock);
return m;
}
static struct dentry *mqueue_mount(struct file_system_type *fs_type, static struct dentry *mqueue_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, int flags, const char *dev_name,
void *data) void *data)
{ {
struct ipc_namespace *ns; struct vfsmount *m;
if (flags & SB_KERNMOUNT) { if (flags & SB_KERNMOUNT)
ns = data; return mount_nodev(fs_type, flags, data, mqueue_fill_super);
data = NULL; m = mq_internal_mount();
} else { if (IS_ERR(m))
ns = current->nsproxy->ipc_ns; return ERR_CAST(m);
} atomic_inc(&m->mnt_sb->s_active);
return mount_ns(fs_type, flags, data, ns, ns->user_ns, mqueue_fill_super); down_write(&m->mnt_sb->s_umount);
return dget(m->mnt_root);
} }
static void init_once(void *foo) static void init_once(void *foo)
...@@ -416,11 +460,11 @@ static void mqueue_evict_inode(struct inode *inode) ...@@ -416,11 +460,11 @@ static void mqueue_evict_inode(struct inode *inode)
put_ipc_ns(ipc_ns); put_ipc_ns(ipc_ns);
} }
static int mqueue_create(struct inode *dir, struct dentry *dentry, static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
umode_t mode, bool excl)
{ {
struct inode *dir = dentry->d_parent->d_inode;
struct inode *inode; struct inode *inode;
struct mq_attr *attr = dentry->d_fsdata; struct mq_attr *attr = arg;
int error; int error;
struct ipc_namespace *ipc_ns; struct ipc_namespace *ipc_ns;
...@@ -461,6 +505,12 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, ...@@ -461,6 +505,12 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry,
return error; return error;
} }
static int mqueue_create(struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
return mqueue_create_attr(dentry, mode, NULL);
}
static int mqueue_unlink(struct inode *dir, struct dentry *dentry) static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
{ {
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
...@@ -691,96 +741,46 @@ static void remove_notification(struct mqueue_inode_info *info) ...@@ -691,96 +741,46 @@ static void remove_notification(struct mqueue_inode_info *info)
info->notify_user_ns = NULL; info->notify_user_ns = NULL;
} }
static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) static int prepare_open(struct dentry *dentry, int oflag, int ro,
{ umode_t mode, struct filename *name,
int mq_treesize;
unsigned long total_size;
if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0)
return -EINVAL;
if (capable(CAP_SYS_RESOURCE)) {
if (attr->mq_maxmsg > HARD_MSGMAX ||
attr->mq_msgsize > HARD_MSGSIZEMAX)
return -EINVAL;
} else {
if (attr->mq_maxmsg > ipc_ns->mq_msg_max ||
attr->mq_msgsize > ipc_ns->mq_msgsize_max)
return -EINVAL;
}
/* check for overflow */
if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg)
return -EOVERFLOW;
mq_treesize = attr->mq_maxmsg * sizeof(struct msg_msg) +
min_t(unsigned int, attr->mq_maxmsg, MQ_PRIO_MAX) *
sizeof(struct posix_msg_tree_node);
total_size = attr->mq_maxmsg * attr->mq_msgsize;
if (total_size + mq_treesize < total_size)
return -EOVERFLOW;
return 0;
}
/*
* Invoked when creating a new queue via sys_mq_open
*/
static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir,
struct path *path, int oflag, umode_t mode,
struct mq_attr *attr) struct mq_attr *attr)
{
const struct cred *cred = current_cred();
int ret;
if (attr) {
ret = mq_attr_ok(ipc_ns, attr);
if (ret)
return ERR_PTR(ret);
/* store for use during create */
path->dentry->d_fsdata = attr;
} else {
struct mq_attr def_attr;
def_attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
ipc_ns->mq_msg_default);
def_attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
ipc_ns->mq_msgsize_default);
ret = mq_attr_ok(ipc_ns, &def_attr);
if (ret)
return ERR_PTR(ret);
}
mode &= ~current_umask();
ret = vfs_create(dir, path->dentry, mode, true);
path->dentry->d_fsdata = NULL;
if (ret)
return ERR_PTR(ret);
return dentry_open(path, oflag, cred);
}
/* Opens existing queue */
static struct file *do_open(struct path *path, int oflag)
{ {
static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
MAY_READ | MAY_WRITE }; MAY_READ | MAY_WRITE };
int acc; int acc;
if (d_really_is_negative(dentry)) {
if (!(oflag & O_CREAT))
return -ENOENT;
if (ro)
return ro;
audit_inode_parent_hidden(name, dentry->d_parent);
return vfs_mkobj(dentry, mode & ~current_umask(),
mqueue_create_attr, attr);
}
/* it already existed */
audit_inode(name, dentry, 0);
if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
return -EEXIST;
if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
return ERR_PTR(-EINVAL); return -EINVAL;
acc = oflag2acc[oflag & O_ACCMODE]; acc = oflag2acc[oflag & O_ACCMODE];
if (inode_permission(d_inode(path->dentry), acc)) return inode_permission(d_inode(dentry), acc);
return ERR_PTR(-EACCES);
return dentry_open(path, oflag, current_cred());
} }
static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
struct mq_attr *attr) struct mq_attr *attr)
{ {
struct path path; struct vfsmount *mnt = mq_internal_mount();
struct file *filp; struct dentry *root;
struct filename *name; struct filename *name;
struct path path;
int fd, error; int fd, error;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
struct vfsmount *mnt = ipc_ns->mq_mnt;
struct dentry *root = mnt->mnt_root;
int ro; int ro;
if (IS_ERR(mnt))
return PTR_ERR(mnt);
audit_mq_open(oflag, mode, attr); audit_mq_open(oflag, mode, attr);
if (IS_ERR(name = getname(u_name))) if (IS_ERR(name = getname(u_name)))
...@@ -791,7 +791,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, ...@@ -791,7 +791,7 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
goto out_putname; goto out_putname;
ro = mnt_want_write(mnt); /* we'll drop it in any case */ ro = mnt_want_write(mnt); /* we'll drop it in any case */
error = 0; root = mnt->mnt_root;
inode_lock(d_inode(root)); inode_lock(d_inode(root));
path.dentry = lookup_one_len(name->name, root, strlen(name->name)); path.dentry = lookup_one_len(name->name, root, strlen(name->name));
if (IS_ERR(path.dentry)) { if (IS_ERR(path.dentry)) {
...@@ -799,38 +799,14 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode, ...@@ -799,38 +799,14 @@ static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
goto out_putfd; goto out_putfd;
} }
path.mnt = mntget(mnt); path.mnt = mntget(mnt);
error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
if (oflag & O_CREAT) { if (!error) {
if (d_really_is_positive(path.dentry)) { /* entry already exists */ struct file *file = dentry_open(&path, oflag, current_cred());
audit_inode(name, path.dentry, 0); if (!IS_ERR(file))
if (oflag & O_EXCL) { fd_install(fd, file);
error = -EEXIST; else
goto out; error = PTR_ERR(file);
}
filp = do_open(&path, oflag);
} else {
if (ro) {
error = ro;
goto out;
}
audit_inode_parent_hidden(name, root);
filp = do_create(ipc_ns, d_inode(root), &path,
oflag, mode, attr);
}
} else {
if (d_really_is_negative(path.dentry)) {
error = -ENOENT;
goto out;
}
audit_inode(name, path.dentry, 0);
filp = do_open(&path, oflag);
} }
if (!IS_ERR(filp))
fd_install(fd, filp);
else
error = PTR_ERR(filp);
out:
path_put(&path); path_put(&path);
out_putfd: out_putfd:
if (error) { if (error) {
...@@ -864,6 +840,9 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) ...@@ -864,6 +840,9 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
struct vfsmount *mnt = ipc_ns->mq_mnt; struct vfsmount *mnt = ipc_ns->mq_mnt;
if (!mnt)
return -ENOENT;
name = getname(u_name); name = getname(u_name);
if (IS_ERR(name)) if (IS_ERR(name))
return PTR_ERR(name); return PTR_ERR(name);
...@@ -1590,28 +1569,26 @@ int mq_init_ns(struct ipc_namespace *ns) ...@@ -1590,28 +1569,26 @@ int mq_init_ns(struct ipc_namespace *ns)
ns->mq_msgsize_max = DFLT_MSGSIZEMAX; ns->mq_msgsize_max = DFLT_MSGSIZEMAX;
ns->mq_msg_default = DFLT_MSG; ns->mq_msg_default = DFLT_MSG;
ns->mq_msgsize_default = DFLT_MSGSIZE; ns->mq_msgsize_default = DFLT_MSGSIZE;
ns->mq_mnt = NULL;
ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
if (IS_ERR(ns->mq_mnt)) {
int err = PTR_ERR(ns->mq_mnt);
ns->mq_mnt = NULL;
return err;
}
return 0; return 0;
} }
void mq_clear_sbinfo(struct ipc_namespace *ns) void mq_clear_sbinfo(struct ipc_namespace *ns)
{ {
ns->mq_mnt->mnt_sb->s_fs_info = NULL; if (ns->mq_mnt)
ns->mq_mnt->mnt_sb->s_fs_info = NULL;
} }
void mq_put_mnt(struct ipc_namespace *ns) void mq_put_mnt(struct ipc_namespace *ns)
{ {
kern_unmount(ns->mq_mnt); if (ns->mq_mnt)
kern_unmount(ns->mq_mnt);
} }
static int __init init_mqueue_fs(void) static int __init init_mqueue_fs(void)
{ {
struct vfsmount *m;
int error; int error;
mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache", mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
...@@ -1633,6 +1610,10 @@ static int __init init_mqueue_fs(void) ...@@ -1633,6 +1610,10 @@ static int __init init_mqueue_fs(void)
if (error) if (error)
goto out_filesystem; goto out_filesystem;
m = kern_mount_data(&mqueue_fs_type, &init_ipc_ns);
if (IS_ERR(m))
goto out_filesystem;
init_ipc_ns.mq_mnt = m;
return 0; return 0;
out_filesystem: out_filesystem:
......
...@@ -150,39 +150,29 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ...@@ -150,39 +150,29 @@ static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
return 0; return 0;
} }
static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, static int bpf_mkobj_ops(struct dentry *dentry, umode_t mode, void *raw,
umode_t mode, const struct inode_operations *iops) const struct inode_operations *iops)
{ {
struct inode *inode; struct inode *dir = dentry->d_parent->d_inode;
struct inode *inode = bpf_get_inode(dir->i_sb, dir, mode);
inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG);
if (IS_ERR(inode)) if (IS_ERR(inode))
return PTR_ERR(inode); return PTR_ERR(inode);
inode->i_op = iops; inode->i_op = iops;
inode->i_private = dentry->d_fsdata; inode->i_private = raw;
bpf_dentry_finalize(dentry, inode, dir); bpf_dentry_finalize(dentry, inode, dir);
return 0; return 0;
} }
static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, static int bpf_mkprog(struct dentry *dentry, umode_t mode, void *arg)
dev_t devt)
{ {
enum bpf_type type = MINOR(devt); return bpf_mkobj_ops(dentry, mode, arg, &bpf_prog_iops);
}
if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) ||
dentry->d_fsdata == NULL)
return -EPERM;
switch (type) { static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
case BPF_TYPE_PROG: {
return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops);
case BPF_TYPE_MAP:
return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops);
default:
return -EPERM;
}
} }
static struct dentry * static struct dentry *
...@@ -218,7 +208,6 @@ static int bpf_symlink(struct inode *dir, struct dentry *dentry, ...@@ -218,7 +208,6 @@ static int bpf_symlink(struct inode *dir, struct dentry *dentry,
static const struct inode_operations bpf_dir_iops = { static const struct inode_operations bpf_dir_iops = {
.lookup = bpf_lookup, .lookup = bpf_lookup,
.mknod = bpf_mkobj,
.mkdir = bpf_mkdir, .mkdir = bpf_mkdir,
.symlink = bpf_symlink, .symlink = bpf_symlink,
.rmdir = simple_rmdir, .rmdir = simple_rmdir,
...@@ -234,7 +223,6 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, ...@@ -234,7 +223,6 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
struct inode *dir; struct inode *dir;
struct path path; struct path path;
umode_t mode; umode_t mode;
dev_t devt;
int ret; int ret;
dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0);
...@@ -242,9 +230,8 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, ...@@ -242,9 +230,8 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
return PTR_ERR(dentry); return PTR_ERR(dentry);
mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask());
devt = MKDEV(UNNAMED_MAJOR, type);
ret = security_path_mknod(&path, dentry, mode, devt); ret = security_path_mknod(&path, dentry, mode, 0);
if (ret) if (ret)
goto out; goto out;
...@@ -254,9 +241,16 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw, ...@@ -254,9 +241,16 @@ static int bpf_obj_do_pin(const struct filename *pathname, void *raw,
goto out; goto out;
} }
dentry->d_fsdata = raw; switch (type) {
ret = vfs_mknod(dir, dentry, mode, devt); case BPF_TYPE_PROG:
dentry->d_fsdata = NULL; ret = vfs_mkobj(dentry, mode, bpf_mkprog, raw);
break;
case BPF_TYPE_MAP:
ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw);
break;
default:
ret = -EPERM;
}
out: out:
done_path_create(&path, dentry); done_path_create(&path, dentry);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment