Commit 1d5ab0e9 authored by Seth Forshee's avatar Seth Forshee

loop: Add loop filesystem

Add limited capability for use of loop devices in non-root
containers via a loopfs psuedo fs. When mounted this filesystem
will contain only a loop-control device node. This can be used
to request free loop devices which will be "owned" by that mount.
Device nodes appear automatically for these devices, and the same
device will not be given to another loopfs mount. Privileged loop
ioctls (for encrypted loop) will be allowed within the namespace
which mounted the loopfs.

Privileged block ioctls are not permitted, so features such as
partitions are not supported for unprivileged users.
Signed-off-by: default avatarSeth Forshee <seth.forshee@canonical.com>
parent 87cad790
......@@ -44,6 +44,7 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
obj-$(CONFIG_ZRAM) += zram/
loop-y := loop-core.o loopfs.o
nvme-y := nvme-core.o nvme-scsi.o
skd-y := skd_main.o
swim_mod-y := swim.o swim_asm.o
/*
* linux/drivers/block/loop.c
* linux/drivers/block/loop-core.c
*
* Written by Theodore Ts'o, 3/29/93
*
......@@ -843,10 +843,6 @@ static int loop_clr_fd(struct loop_device *lo)
if (lo->lo_state != Lo_bound)
return -ENXIO;
/* Fail if any partitions open */
if (bdev && bdev->bd_part_count > 0)
return -EBUSY;
/*
* If we've explicitly asked to tear down the loop device,
* and it has an elevated reference count, set it for auto-teardown when
......@@ -1554,7 +1550,7 @@ static struct blk_mq_ops loop_mq_ops = {
.init_request = loop_init_request,
};
static int loop_add(struct loop_device **l, int i)
static int loop_add(struct loop_device **l, int i, struct inode *inode)
{
struct loop_device *lo;
struct gendisk *disk;
......@@ -1638,6 +1634,14 @@ static int loop_add(struct loop_device **l, int i)
disk->queue = lo->lo_queue;
sprintf(disk->disk_name, "loop%d", i);
add_disk(disk);
lo->loopfs_inode = loopfs_new_dev(inode, disk_devt(disk),
lo->lo_number);
if (IS_ERR(lo->loopfs_inode)) {
pr_warn("Unable to create loopfs inode\n");
lo->loopfs_inode = NULL;
}
*l = lo;
return lo->lo_number;
......@@ -1655,6 +1659,8 @@ static int loop_add(struct loop_device **l, int i)
static void loop_remove(struct loop_device *lo)
{
loopfs_kill_dev(lo->loopfs_inode);
lo->loopfs_inode = NULL;
blk_cleanup_queue(lo->lo_queue);
del_gendisk(lo->lo_disk);
blk_mq_free_tag_set(&lo->tag_set);
......@@ -1662,27 +1668,40 @@ static void loop_remove(struct loop_device *lo)
kfree(lo);
}
struct find_free_cb_data {
struct loop_device **l;
struct inode *inode;
};
static int find_free_cb(int id, void *ptr, void *data)
{
struct loop_device *lo = ptr;
struct loop_device **l = data;
struct find_free_cb_data *cb_data = data;
if (lo->lo_state == Lo_unbound) {
*l = lo;
return 1;
}
return 0;
if (lo->lo_state != Lo_unbound)
return 0;
/* Don't return a device added from a different loopfs mount */
if (loopfs_sb_from_inode(cb_data->inode) !=
loopfs_sb_from_inode(lo->loopfs_inode))
return 0;
*cb_data->l = lo;
return 1;
}
static int loop_lookup(struct loop_device **l, int i)
static int loop_lookup(struct loop_device **l, int i, struct inode *inode)
{
struct loop_device *lo;
int ret = -ENODEV;
if (i < 0) {
struct find_free_cb_data cb_data;
int err;
err = idr_for_each(&loop_index_idr, &find_free_cb, &lo);
cb_data.l = &lo;
cb_data.inode = inode;
err = idr_for_each(&loop_index_idr, &find_free_cb, &cb_data);
if (err == 1) {
*l = lo;
ret = lo->lo_number;
......@@ -1693,13 +1712,69 @@ static int loop_lookup(struct loop_device **l, int i)
/* lookup and return a specific i */
lo = idr_find(&loop_index_idr, i);
if (lo) {
*l = lo;
ret = lo->lo_number;
if (loopfs_sb_from_inode(inode) !=
loopfs_sb_from_inode(lo->loopfs_inode)) {
ret = -EACCES;
} else {
*l = lo;
ret = lo->lo_number;
}
}
out:
return ret;
}
/*
* Release the loop device associated with the given loopfs inode.
* If the block device is open we may not be able to release the
* loop device; in that case the loop device is dissociated from
* its loopfs superblock.
*/
void loop_release_device(struct inode *inode)
{
struct block_device *bdev;
struct loop_device *lo;
bool got_bdev = false;
int err;
mutex_lock(&loop_index_mutex);
err = loop_lookup(&lo, MINOR(inode->i_rdev) >> part_shift, inode);
if (err < 0) {
mutex_unlock(&loop_index_mutex);
return;
}
mutex_lock(&lo->lo_ctl_mutex);
/* Cannot hold loop_intex_mutex during blkdev_get */
mutex_unlock(&loop_index_mutex);
bdev = lo->lo_device;
if (bdev) {
bdgrab(bdev);
got_bdev = !blkdev_get(bdev, FMODE_READ, NULL);
}
mutex_lock(&loop_index_mutex);
if (got_bdev) {
if (loop_clr_fd(lo))
mutex_unlock(&lo->lo_ctl_mutex);
/* Cannot hold lo_ctl_mutex during blkdev_put */
blkdev_put(bdev, FMODE_READ);
mutex_lock(&lo->lo_ctl_mutex);
} else if (bdev) {
bdput(bdev);
}
if (lo->lo_state == Lo_unbound && atomic_read(&lo->lo_refcnt) == 0) {
lo->lo_disk->private_data = NULL;
mutex_unlock(&lo->lo_ctl_mutex);
idr_remove(&loop_index_idr, lo->lo_number);
loop_remove(lo);
} else {
mutex_unlock(&lo->lo_ctl_mutex);
loopfs_kill_dev(lo->loopfs_inode);
lo->loopfs_inode = NULL;
}
mutex_unlock(&loop_index_mutex);
}
static struct kobject *loop_probe(dev_t dev, int *part, void *data)
{
struct loop_device *lo;
......@@ -1707,9 +1782,9 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
int err;
mutex_lock(&loop_index_mutex);
err = loop_lookup(&lo, MINOR(dev) >> part_shift);
err = loop_lookup(&lo, MINOR(dev) >> part_shift, NULL);
if (err < 0)
err = loop_add(&lo, MINOR(dev) >> part_shift);
err = loop_add(&lo, MINOR(dev) >> part_shift, NULL);
if (err < 0)
kobj = NULL;
else
......@@ -1723,21 +1798,22 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data)
static long loop_control_ioctl(struct file *file, unsigned int cmd,
unsigned long parm)
{
struct inode *inode = file_inode(file);
struct loop_device *lo;
int ret = -ENOSYS;
mutex_lock(&loop_index_mutex);
switch (cmd) {
case LOOP_CTL_ADD:
ret = loop_lookup(&lo, parm);
ret = loop_lookup(&lo, parm, inode);
if (ret >= 0) {
ret = -EEXIST;
break;
}
ret = loop_add(&lo, parm);
ret = loop_add(&lo, parm, inode);
break;
case LOOP_CTL_REMOVE:
ret = loop_lookup(&lo, parm);
ret = loop_lookup(&lo, parm, inode);
if (ret < 0)
break;
mutex_lock(&lo->lo_ctl_mutex);
......@@ -1757,10 +1833,10 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
loop_remove(lo);
break;
case LOOP_CTL_GET_FREE:
ret = loop_lookup(&lo, -1);
ret = loop_lookup(&lo, -1, inode);
if (ret >= 0)
break;
ret = loop_add(&lo, -1);
ret = loop_add(&lo, -1, inode);
}
mutex_unlock(&loop_index_mutex);
......@@ -1847,9 +1923,12 @@ static int __init loop_init(void)
/* pre-create number of devices given by config or max_loop */
mutex_lock(&loop_index_mutex);
for (i = 0; i < nr; i++)
loop_add(&lo, i);
loop_add(&lo, i, NULL);
mutex_unlock(&loop_index_mutex);
if (loop_init())
pr_warn("Failed to initialize loopfs filesystem\n");
printk(KERN_INFO "loop: module loaded\n");
return 0;
......@@ -1870,6 +1949,8 @@ static void __exit loop_exit(void)
{
unsigned long range;
loopfs_exit();
range = max_loop ? max_loop << part_shift : 1UL << MINORBITS;
idr_for_each(&loop_index_idr, &loop_exit_cb, NULL);
......
......@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>
#include <linux/fs.h>
#include <uapi/linux/loop.h>
/* Possible states of device */
......@@ -64,6 +65,8 @@ struct loop_device {
struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk;
struct inode *loopfs_inode;
};
struct loop_cmd {
......@@ -89,4 +92,13 @@ struct loop_func_table {
int loop_register_transfer(struct loop_func_table *funcs);
int loop_unregister_transfer(int number);
/* loopfs psuedo filesystem support */
struct super_block *loopfs_sb_from_inode(struct inode *inode);
struct inode *loopfs_new_dev(struct inode *ref_inode, dev_t device,
int lo_number);
void loopfs_kill_dev(struct inode *inode);
int loopfs_init(void);
void loopfs_exit(void);
void loop_release_device(struct inode *inode);
#endif
/*
* drivers/block/loopfs.c
*
* Copyright (C) 2014-2015 Canonical Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/magic.h>
#include <linux/major.h>
#include <linux/list.h>
#include <linux/miscdevice.h>
#include <linux/parser.h>
#include <linux/fsnotify.h>
#include "loop.h"
static struct vfsmount *loopfs_mnt;
struct loop_mount_opts {
bool host_mount;
};
struct loop_fs_info {
struct dentry *control_dentry;
struct loop_mount_opts opts;
kuid_t root_uid;
kgid_t root_gid;
};
enum {
opt_hostmount,
opt_err
};
static const match_table_t tokens = {
{opt_hostmount, "hostmount"},
{opt_err, NULL}
};
static inline struct loop_fs_info *LOOPFS_SB(struct super_block *sb)
{
return sb->s_fs_info;
}
struct super_block *loopfs_sb_from_inode(struct inode *inode)
{
if (inode && inode->i_sb->s_magic == LOOPFS_SUPER_MAGIC)
return inode->i_sb;
return loopfs_mnt->mnt_sb;
}
static int mknod_loop_control(struct super_block *sb)
{
int ret = 0;
struct loop_fs_info *fsi = LOOPFS_SB(sb);
struct dentry *root = sb->s_root;
struct dentry *dentry;
struct inode *inode;
mutex_lock(&root->d_inode->i_mutex);
if (fsi->control_dentry)
goto out;
dentry = d_alloc_name(root, "loop-control");
if (!dentry) {
pr_notice("Unable to allocate dentry for loop-control\n");
ret = -ENOMEM;
goto out;
}
inode = new_inode(sb);
if (!inode) {
pr_notice("Uname to allocate inode for loop-control\n");
dput(dentry);
ret = -ENOMEM;
goto out;
}
inode->i_ino = 2;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
init_special_inode(inode, S_IFCHR | S_IRUSR | S_IWUSR,
MKDEV(MISC_MAJOR, LOOP_CTRL_MINOR));
inode->i_uid = fsi->root_uid;
inode->i_gid = fsi->root_gid;
d_add(dentry, inode);
fsi->control_dentry = dentry;
out:
mutex_unlock(&root->d_inode->i_mutex);
return ret;
}
static const struct super_operations loopfs_sops = {
.statfs = simple_statfs,
};
static int parse_mount_options(char *data, struct loop_mount_opts *opts)
{
char *p;
opts->host_mount = false;
while ((p = strsep(&data, ",")) != NULL) {
substring_t args[MAX_OPT_ARGS];
int token;
if (!*p)
continue;
token = match_token(p, tokens, args);
switch (token) {
case opt_hostmount:
opts->host_mount = true;
break;
default:
pr_err("loopfs: invalid mount options\n");
return -EINVAL;
}
}
return 0;
}
static int loopfs_fill_super(struct super_block *s, void *data, int silent)
{
struct inode *inode = NULL;
struct loop_fs_info *fsi;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = LOOPFS_SUPER_MAGIC;
s->s_op = &loopfs_sops;
s->s_time_gran = 1;
fsi = kzalloc(sizeof(struct loop_fs_info), GFP_KERNEL);
if (!fsi)
return -ENOMEM;
s->s_fs_info = fsi;
fsi->root_uid = make_kuid(current_user_ns(), 0);
if (!uid_valid(fsi->root_uid))
fsi->root_uid = GLOBAL_ROOT_UID;
fsi->root_gid = make_kgid(current_user_ns(), 0);
if (!gid_valid(fsi->root_gid))
fsi->root_gid = GLOBAL_ROOT_GID;
inode = new_inode(s);
if (!inode)
goto cleanup;
inode->i_ino = 1;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
set_nlink(inode, 2);
s->s_root = d_make_root(inode);
if (s->s_root)
return 0;
cleanup:
if (inode)
iput(inode);
if (fsi)
kfree(fsi);
return -ENOMEM;
}
static int compare_init_loop_sb(struct super_block *s, void *p)
{
if (loopfs_mnt)
return loopfs_mnt->mnt_sb == s;
return 0;
}
static struct dentry *loopfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name,
void *data)
{
int ret;
struct super_block *s;
struct loop_mount_opts opts;
ret = parse_mount_options(data, &opts);
if (ret)
return ERR_PTR(ret);
/*
* hostmount is only available for system-wide CAP_SYS_ADMIN;
* drop it otherwise.
*/
if (opts.host_mount && !capable(CAP_SYS_ADMIN)) {
pr_notice("loopfs: dropping hostmount option for unprivileged user\n");
opts.host_mount = false;
}
if (opts.host_mount)
s = sget(fs_type, compare_init_loop_sb, set_anon_super,
flags, NULL);
else
s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s))
return ERR_CAST(s);
if (!s->s_root) {
ret = loopfs_fill_super(s, data, (flags & MS_SILENT) != 0);
if (ret)
goto cleanup;
s->s_flags |= MS_ACTIVE;
}
LOOPFS_SB(s)->opts = opts;
ret = mknod_loop_control(s);
if (ret)
goto cleanup;
return dget(s->s_root);
cleanup:
deactivate_locked_super(s);
return ERR_PTR(ret);
}
static void loopfs_kill_sb(struct super_block *sb)
{
struct inode *inode, *tmp;
/*
* Release any loop devices associated with this superblock.
* The inode passed to loop_release_device will be passed back
* to loopfs_kill_dev and will likely be evicted.
*/
list_for_each_entry_safe(inode, tmp, &sb->s_inodes, i_sb_list) {
if (S_ISBLK(inode->i_mode) && imajor(inode) == LOOP_MAJOR)
loop_release_device(inode);
}
kfree(LOOPFS_SB(sb));
kill_litter_super(sb);
}
static struct file_system_type loopfs_fs_type = {
.name = "loopfs",
.mount = loopfs_mount,
.kill_sb = loopfs_kill_sb,
.fs_flags = FS_USERNS_MOUNT | FS_USERNS_DEV_MOUNT,
};
/**
* loopfs_new_dev -- create new loop device in /dev/loop/
* @ref_inode: inode in the superblock where the new node is to be
* created. Usually this will be the loop-control inode but might
* also be another loop device inode if the new device is a
* partition.
* @device: major+minor of the node to be created
* @lo_number: index of new loop device
*
* Returns the created inode, which can be removed from /dev/loop by
* loopfs_kill_dev(). Returns NULL if @ref_inode is not in a loopfs
* superblock.
*/
struct inode *loopfs_new_dev(struct inode *ref_inode, dev_t device,
int lo_number)
{
struct super_block *sb = loopfs_sb_from_inode(ref_inode);
unsigned int major = MAJOR(device);
unsigned int minor = MINOR(device);
struct dentry *root, *dentry;
struct inode *inode;
char name[12];
if (major != LOOP_MAJOR)
return ERR_PTR(-EINVAL);
if (!sb)
return NULL;
if (snprintf(name, sizeof(name), "%d", lo_number) >= sizeof(name))
return ERR_PTR(-EINVAL);
root = sb->s_root;
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
inode->i_ino = minor + 3;
inode->i_uid = LOOPFS_SB(sb)->root_uid;
inode->i_gid = LOOPFS_SB(sb)->root_gid;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
init_special_inode(inode, S_IFBLK | 0660, device);
mutex_lock(&root->d_inode->i_mutex);
dentry = d_alloc_name(root, name);
if (dentry) {
d_add(dentry, inode);
fsnotify_create(root->d_inode, dentry);
} else {
iput(inode);
inode = ERR_PTR(-ENOMEM);
}
mutex_unlock(&root->d_inode->i_mutex);
return inode;
}
/**
* loopfs_kill_dev -- remove inode from /dev/loop/
* @inode: inode of loop device to be removed
*
* Kill an inode created by loopfs_new_dev().
*/
void loopfs_kill_dev(struct inode *inode)
{
struct dentry *root, *dentry;
if (!inode)
return;
if (!S_ISBLK(inode->i_mode) || imajor(inode) != LOOP_MAJOR)
return;
root = loopfs_sb_from_inode(inode)->s_root;
mutex_lock(&root->d_inode->i_mutex);
dentry = d_find_alias(inode);
drop_nlink(inode);
d_delete(dentry);
dput(dentry); /* for d_alloc_name() in loopfs_new_dev() */
dput(dentry); /* for d_find_alias() above */
mutex_unlock(&root->d_inode->i_mutex);
}
int __init loopfs_init(void)
{
int ret = register_filesystem(&loopfs_fs_type);
if (!ret) {
loopfs_mnt = kern_mount(&loopfs_fs_type);
if (IS_ERR(loopfs_mnt)) {
ret = PTR_ERR(loopfs_mnt);
unregister_filesystem(&loopfs_fs_type);
}
}
return ret;
}
void __exit loopfs_exit(void)
{
if (loopfs_mnt) {
kern_unmount(loopfs_mnt);
loopfs_mnt = NULL;
}
unregister_filesystem(&loopfs_fs_type);
}
......@@ -65,6 +65,7 @@
#define BDEVFS_MAGIC 0x62646576
#define BINFMTFS_MAGIC 0x42494e4d
#define DEVPTS_SUPER_MAGIC 0x1cd1
#define LOOPFS_SUPER_MAGIC 0x6c6f6f70
#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
#define PIPEFS_MAGIC 0x50495045
#define PROC_SUPER_MAGIC 0x9fa0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment