Commit c0abe5af authored by Al Viro's avatar Al Viro

Merge branches 'work.mount', 'work.misc' and 'work.iov_iter' into for-next

...@@ -259,7 +259,6 @@ ForEachMacros: ...@@ -259,7 +259,6 @@ ForEachMacros:
- 'idr_for_each_entry_ul' - 'idr_for_each_entry_ul'
- 'inet_bind_bucket_for_each' - 'inet_bind_bucket_for_each'
- 'inet_lhash2_for_each_icsk_rcu' - 'inet_lhash2_for_each_icsk_rcu'
- 'iov_for_each'
- 'key_for_each' - 'key_for_each'
- 'key_for_each_safe' - 'key_for_each_safe'
- 'klp_for_each_func' - 'klp_for_each_func'
......
This diff is collapsed.
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/kernfs.h> #include <linux/kernfs.h>
#include <linux/fs_context.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
#define MSR_IA32_L3_QOS_CFG 0xc81 #define MSR_IA32_L3_QOS_CFG 0xc81
...@@ -40,6 +41,21 @@ ...@@ -40,6 +41,21 @@
#define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62) #define RMID_VAL_UNAVAIL BIT_ULL(62)
struct rdt_fs_context {
struct kernfs_fs_context kfc;
bool enable_cdpl2;
bool enable_cdpl3;
bool enable_mba_mbps;
};
static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
{
struct kernfs_fs_context *kfc = fc->fs_private;
return container_of(kfc, struct rdt_fs_context, kfc);
}
DECLARE_STATIC_KEY_FALSE(rdt_enable_key); DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
/** /**
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/fs_parser.h>
#include <linux/sysfs.h> #include <linux/sysfs.h>
#include <linux/kernfs.h> #include <linux/kernfs.h>
#include <linux/seq_buf.h> #include <linux/seq_buf.h>
...@@ -32,6 +33,7 @@ ...@@ -32,6 +33,7 @@
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/task_work.h> #include <linux/task_work.h>
#include <linux/user_namespace.h>
#include <uapi/linux/magic.h> #include <uapi/linux/magic.h>
...@@ -1858,46 +1860,6 @@ static void cdp_disable_all(void) ...@@ -1858,46 +1860,6 @@ static void cdp_disable_all(void)
cdpl2_disable(); cdpl2_disable();
} }
static int parse_rdtgroupfs_options(char *data)
{
char *token, *o = data;
int ret = 0;
while ((token = strsep(&o, ",")) != NULL) {
if (!*token) {
ret = -EINVAL;
goto out;
}
if (!strcmp(token, "cdp")) {
ret = cdpl3_enable();
if (ret)
goto out;
} else if (!strcmp(token, "cdpl2")) {
ret = cdpl2_enable();
if (ret)
goto out;
} else if (!strcmp(token, "mba_MBps")) {
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
ret = set_mba_sc(true);
else
ret = -EINVAL;
if (ret)
goto out;
} else {
ret = -EINVAL;
goto out;
}
}
return 0;
out:
pr_err("Invalid mount option \"%s\"\n", token);
return ret;
}
/* /*
* We don't allow rdtgroup directories to be created anywhere * We don't allow rdtgroup directories to be created anywhere
* except the root directory. Thus when looking for the rdtgroup * except the root directory. Thus when looking for the rdtgroup
...@@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, ...@@ -1969,13 +1931,27 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn,
struct rdtgroup *prgrp, struct rdtgroup *prgrp,
struct kernfs_node **mon_data_kn); struct kernfs_node **mon_data_kn);
static struct dentry *rdt_mount(struct file_system_type *fs_type, static int rdt_enable_ctx(struct rdt_fs_context *ctx)
int flags, const char *unused_dev_name,
void *data)
{ {
int ret = 0;
if (ctx->enable_cdpl2)
ret = cdpl2_enable();
if (!ret && ctx->enable_cdpl3)
ret = cdpl3_enable();
if (!ret && ctx->enable_mba_mbps)
ret = set_mba_sc(true);
return ret;
}
static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct rdt_domain *dom; struct rdt_domain *dom;
struct rdt_resource *r; struct rdt_resource *r;
struct dentry *dentry;
int ret; int ret;
cpus_read_lock(); cpus_read_lock();
...@@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, ...@@ -1984,53 +1960,42 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
* resctrl file system can only be mounted once. * resctrl file system can only be mounted once.
*/ */
if (static_branch_unlikely(&rdt_enable_key)) { if (static_branch_unlikely(&rdt_enable_key)) {
dentry = ERR_PTR(-EBUSY); ret = -EBUSY;
goto out; goto out;
} }
ret = parse_rdtgroupfs_options(data); ret = rdt_enable_ctx(ctx);
if (ret) { if (ret < 0)
dentry = ERR_PTR(ret);
goto out_cdp; goto out_cdp;
}
closid_init(); closid_init();
ret = rdtgroup_create_info_dir(rdtgroup_default.kn); ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
if (ret) { if (ret < 0)
dentry = ERR_PTR(ret); goto out_mba;
goto out_cdp;
}
if (rdt_mon_capable) { if (rdt_mon_capable) {
ret = mongroup_create_dir(rdtgroup_default.kn, ret = mongroup_create_dir(rdtgroup_default.kn,
NULL, "mon_groups", NULL, "mon_groups",
&kn_mongrp); &kn_mongrp);
if (ret) { if (ret < 0)
dentry = ERR_PTR(ret);
goto out_info; goto out_info;
}
kernfs_get(kn_mongrp); kernfs_get(kn_mongrp);
ret = mkdir_mondata_all(rdtgroup_default.kn, ret = mkdir_mondata_all(rdtgroup_default.kn,
&rdtgroup_default, &kn_mondata); &rdtgroup_default, &kn_mondata);
if (ret) { if (ret < 0)
dentry = ERR_PTR(ret);
goto out_mongrp; goto out_mongrp;
}
kernfs_get(kn_mondata); kernfs_get(kn_mondata);
rdtgroup_default.mon.mon_data_kn = kn_mondata; rdtgroup_default.mon.mon_data_kn = kn_mondata;
} }
ret = rdt_pseudo_lock_init(); ret = rdt_pseudo_lock_init();
if (ret) { if (ret)
dentry = ERR_PTR(ret);
goto out_mondata; goto out_mondata;
}
dentry = kernfs_mount(fs_type, flags, rdt_root, ret = kernfs_get_tree(fc);
RDTGROUP_SUPER_MAGIC, NULL); if (ret < 0)
if (IS_ERR(dentry))
goto out_psl; goto out_psl;
if (rdt_alloc_capable) if (rdt_alloc_capable)
...@@ -2059,14 +2024,95 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, ...@@ -2059,14 +2024,95 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
kernfs_remove(kn_mongrp); kernfs_remove(kn_mongrp);
out_info: out_info:
kernfs_remove(kn_info); kernfs_remove(kn_info);
out_mba:
if (ctx->enable_mba_mbps)
set_mba_sc(false);
out_cdp: out_cdp:
cdp_disable_all(); cdp_disable_all();
out: out:
rdt_last_cmd_clear(); rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex); mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock(); cpus_read_unlock();
return ret;
}
enum rdt_param {
Opt_cdp,
Opt_cdpl2,
Opt_mba_mpbs,
nr__rdt_params
};
static const struct fs_parameter_spec rdt_param_specs[] = {
fsparam_flag("cdp", Opt_cdp),
fsparam_flag("cdpl2", Opt_cdpl2),
fsparam_flag("mba_mpbs", Opt_mba_mpbs),
{}
};
static const struct fs_parameter_description rdt_fs_parameters = {
.name = "rdt",
.specs = rdt_param_specs,
};
return dentry; static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
struct fs_parse_result result;
int opt;
opt = fs_parse(fc, &rdt_fs_parameters, param, &result);
if (opt < 0)
return opt;
switch (opt) {
case Opt_cdp:
ctx->enable_cdpl3 = true;
return 0;
case Opt_cdpl2:
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mpbs:
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -EINVAL;
ctx->enable_mba_mbps = true;
return 0;
}
return -EINVAL;
}
static void rdt_fs_context_free(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
kernfs_free_fs_context(fc);
kfree(ctx);
}
static const struct fs_context_operations rdt_fs_context_ops = {
.free = rdt_fs_context_free,
.parse_param = rdt_parse_param,
.get_tree = rdt_get_tree,
};
static int rdt_init_fs_context(struct fs_context *fc)
{
struct rdt_fs_context *ctx;
ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->kfc.root = rdt_root;
ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
fc->fs_private = &ctx->kfc;
fc->ops = &rdt_fs_context_ops;
if (fc->user_ns)
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(&init_user_ns);
fc->global = true;
return 0;
} }
static int reset_all_ctrls(struct rdt_resource *r) static int reset_all_ctrls(struct rdt_resource *r)
...@@ -2240,7 +2286,8 @@ static void rdt_kill_sb(struct super_block *sb) ...@@ -2240,7 +2286,8 @@ static void rdt_kill_sb(struct super_block *sb)
static struct file_system_type rdt_fs_type = { static struct file_system_type rdt_fs_type = {
.name = "resctrl", .name = "resctrl",
.mount = rdt_mount, .init_fs_context = rdt_init_fs_context,
.parameters = &rdt_fs_parameters,
.kill_sb = rdt_kill_sb, .kill_sb = rdt_kill_sb,
}; };
......
...@@ -8,6 +8,13 @@ menu "File systems" ...@@ -8,6 +8,13 @@ menu "File systems"
config DCACHE_WORD_ACCESS config DCACHE_WORD_ACCESS
bool bool
config VALIDATE_FS_PARSER
bool "Validate filesystem parameter description"
default y
help
Enable this to perform validation of the parameter description for a
filesystem when it is registered.
if BLOCK if BLOCK
config FS_IOMAP config FS_IOMAP
......
...@@ -12,7 +12,8 @@ obj-y := open.o read_write.o file_table.o super.o \ ...@@ -12,7 +12,8 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \ seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o splice.o sync.o utimes.o d_path.o \ pnode.o splice.o sync.o utimes.o d_path.o \
stack.o fs_struct.o statfs.o fs_pin.o nsfs.o stack.o fs_struct.o statfs.o fs_pin.o nsfs.o \
fs_context.o fs_parser.o
ifeq ($(CONFIG_BLOCK),y) ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o block_dev.o direct-io.o mpage.o obj-y += buffer.o block_dev.o direct-io.o mpage.o
......
...@@ -36,15 +36,14 @@ ...@@ -36,15 +36,14 @@
struct pagevec; struct pagevec;
struct afs_call; struct afs_call;
struct afs_mount_params { struct afs_fs_context {
bool rwpath; /* T if the parent should be considered R/W */
bool force; /* T to force cell type */ bool force; /* T to force cell type */
bool autocell; /* T if set auto mount operation */ bool autocell; /* T if set auto mount operation */
bool dyn_root; /* T if dynamic root */ bool dyn_root; /* T if dynamic root */
bool no_cell; /* T if the source is "none" (for dynroot) */
afs_voltype_t type; /* type of volume requested */ afs_voltype_t type; /* type of volume requested */
int volnamesz; /* size of volume name */ unsigned int volnamesz; /* size of volume name */
const char *volname; /* name of volume to mount */ const char *volname; /* name of volume to mount */
struct net *net_ns; /* Network namespace in effect */
struct afs_net *net; /* the AFS net namespace stuff */ struct afs_net *net; /* the AFS net namespace stuff */
struct afs_cell *cell; /* cell in which to find volume */ struct afs_cell *cell; /* cell in which to find volume */
struct afs_volume *volume; /* volume record */ struct afs_volume *volume; /* volume record */
...@@ -1274,7 +1273,7 @@ static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume) ...@@ -1274,7 +1273,7 @@ static inline struct afs_volume *__afs_get_volume(struct afs_volume *volume)
return volume; return volume;
} }
extern struct afs_volume *afs_create_volume(struct afs_mount_params *); extern struct afs_volume *afs_create_volume(struct afs_fs_context *);
extern void afs_activate_volume(struct afs_volume *); extern void afs_activate_volume(struct afs_volume *);
extern void afs_deactivate_volume(struct afs_volume *); extern void afs_deactivate_volume(struct afs_volume *);
extern void afs_put_volume(struct afs_cell *, struct afs_volume *); extern void afs_put_volume(struct afs_cell *, struct afs_volume *);
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/fs_context.h>
#include "internal.h" #include "internal.h"
...@@ -47,6 +48,8 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out); ...@@ -47,6 +48,8 @@ static DECLARE_DELAYED_WORK(afs_mntpt_expiry_timer, afs_mntpt_expiry_timed_out);
static unsigned long afs_mntpt_expiry_timeout = 10 * 60; static unsigned long afs_mntpt_expiry_timeout = 10 * 60;
static const char afs_root_volume[] = "root.cell";
/* /*
* no valid lookup procedure on this sort of dir * no valid lookup procedure on this sort of dir
*/ */
...@@ -68,108 +71,112 @@ static int afs_mntpt_open(struct inode *inode, struct file *file) ...@@ -68,108 +71,112 @@ static int afs_mntpt_open(struct inode *inode, struct file *file)
} }
/* /*
* create a vfsmount to be automounted * Set the parameters for the proposed superblock.
*/ */
static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
{ {
struct afs_super_info *as; struct afs_fs_context *ctx = fc->fs_private;
struct vfsmount *mnt; struct afs_super_info *src_as = AFS_FS_S(mntpt->d_sb);
struct afs_vnode *vnode; struct afs_vnode *vnode = AFS_FS_I(d_inode(mntpt));
struct page *page; struct afs_cell *cell;
char *devname, *options; const char *p;
bool rwpath = false;
int ret; int ret;
_enter("{%pd}", mntpt); if (fc->net_ns != src_as->net_ns) {
put_net(fc->net_ns);
BUG_ON(!d_inode(mntpt)); fc->net_ns = get_net(src_as->net_ns);
}
ret = -ENOMEM;
devname = (char *) get_zeroed_page(GFP_KERNEL);
if (!devname)
goto error_no_devname;
options = (char *) get_zeroed_page(GFP_KERNEL);
if (!options)
goto error_no_options;
vnode = AFS_FS_I(d_inode(mntpt)); if (src_as->volume && src_as->volume->type == AFSVL_RWVOL) {
ctx->type = AFSVL_RWVOL;
ctx->force = true;
}
if (ctx->cell) {
afs_put_cell(ctx->net, ctx->cell);
ctx->cell = NULL;
}
if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) { if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
/* if the directory is a pseudo directory, use the d_name */ /* if the directory is a pseudo directory, use the d_name */
static const char afs_root_cell[] = ":root.cell.";
unsigned size = mntpt->d_name.len; unsigned size = mntpt->d_name.len;
ret = -ENOENT; if (size < 2)
if (size < 2 || size > AFS_MAXCELLNAME) return -ENOENT;
goto error_no_page;
p = mntpt->d_name.name;
if (mntpt->d_name.name[0] == '.') { if (mntpt->d_name.name[0] == '.') {
devname[0] = '%'; size--;
memcpy(devname + 1, mntpt->d_name.name + 1, size - 1); p++;
memcpy(devname + size, afs_root_cell, ctx->type = AFSVL_RWVOL;
sizeof(afs_root_cell)); ctx->force = true;
rwpath = true;
} else {
devname[0] = '#';
memcpy(devname + 1, mntpt->d_name.name, size);
memcpy(devname + size + 1, afs_root_cell,
sizeof(afs_root_cell));
} }
if (size > AFS_MAXCELLNAME)
return -ENAMETOOLONG;
cell = afs_lookup_cell(ctx->net, p, size, NULL, false);
if (IS_ERR(cell)) {
pr_err("kAFS: unable to lookup cell '%pd'\n", mntpt);
return PTR_ERR(cell);
}
ctx->cell = cell;
ctx->volname = afs_root_volume;
ctx->volnamesz = sizeof(afs_root_volume) - 1;
} else { } else {
/* read the contents of the AFS special symlink */ /* read the contents of the AFS special symlink */
struct page *page;
loff_t size = i_size_read(d_inode(mntpt)); loff_t size = i_size_read(d_inode(mntpt));
char *buf; char *buf;
ret = -EINVAL; if (src_as->cell)
ctx->cell = afs_get_cell(src_as->cell);
if (size > PAGE_SIZE - 1) if (size > PAGE_SIZE - 1)
goto error_no_page; return -EINVAL;
page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL); page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
if (IS_ERR(page)) { if (IS_ERR(page))
ret = PTR_ERR(page); return PTR_ERR(page);
goto error_no_page;
}
if (PageError(page)) { if (PageError(page)) {
ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt); ret = afs_bad(AFS_FS_I(d_inode(mntpt)), afs_file_error_mntpt);
goto error; put_page(page);
return ret;
} }
buf = kmap_atomic(page); buf = kmap(page);
memcpy(devname, buf, size); ret = vfs_parse_fs_string(fc, "source", buf, size);
kunmap_atomic(buf); kunmap(page);
put_page(page); put_page(page);
page = NULL; if (ret < 0)
return ret;
} }
/* work out what options we want */ return 0;
as = AFS_FS_S(mntpt->d_sb); }
if (as->cell) {
memcpy(options, "cell=", 5);
strcpy(options + 5, as->cell->name);
if ((as->volume && as->volume->type == AFSVL_RWVOL) || rwpath)
strcat(options, ",rwpath");
}
/* try and do the mount */ /*
_debug("--- attempting mount %s -o %s ---", devname, options); * create a vfsmount to be automounted
mnt = vfs_submount(mntpt, &afs_fs_type, devname, options); */
_debug("--- mount result %p ---", mnt); static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
{
struct fs_context *fc;
struct vfsmount *mnt;
int ret;
free_page((unsigned long) devname); BUG_ON(!d_inode(mntpt));
free_page((unsigned long) options);
_leave(" = %p", mnt);
return mnt;
error: fc = fs_context_for_submount(&afs_fs_type, mntpt);
put_page(page); if (IS_ERR(fc))
error_no_page: return ERR_CAST(fc);
free_page((unsigned long) options);
error_no_options: ret = afs_mntpt_set_params(fc, mntpt);
free_page((unsigned long) devname); if (!ret)
error_no_devname: mnt = fc_mount(fc);
_leave(" = %d", ret); else
return ERR_PTR(ret); mnt = ERR_PTR(ret);
put_fs_context(fc);
return mnt;
} }
/* /*
......
This diff is collapsed.
...@@ -21,7 +21,7 @@ static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" }; ...@@ -21,7 +21,7 @@ static const char *const afs_voltypes[] = { "R/W", "R/O", "BAK" };
/* /*
* Allocate a volume record and load it up from a vldb record. * Allocate a volume record and load it up from a vldb record.
*/ */
static struct afs_volume *afs_alloc_volume(struct afs_mount_params *params, static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
struct afs_vldb_entry *vldb, struct afs_vldb_entry *vldb,
unsigned long type_mask) unsigned long type_mask)
{ {
...@@ -113,7 +113,7 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, ...@@ -113,7 +113,7 @@ static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
* - Rule 3: If parent volume is R/W, then only mount R/W volume unless * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
* explicitly told otherwise * explicitly told otherwise
*/ */
struct afs_volume *afs_create_volume(struct afs_mount_params *params) struct afs_volume *afs_create_volume(struct afs_fs_context *params)
{ {
struct afs_vldb_entry *vldb; struct afs_vldb_entry *vldb;
struct afs_volume *volume; struct afs_volume *volume;
......
...@@ -455,6 +455,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent) ...@@ -455,6 +455,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
s->s_blocksize_bits = 10; s->s_blocksize_bits = 10;
s->s_magic = DEVPTS_SUPER_MAGIC; s->s_magic = DEVPTS_SUPER_MAGIC;
s->s_op = &devpts_sops; s->s_op = &devpts_sops;
s->s_d_op = &simple_dentry_operations;
s->s_time_gran = 1; s->s_time_gran = 1;
error = -ENOMEM; error = -ENOMEM;
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/fs_parser.h>
/* /*
* Handling of filesystem drivers list. * Handling of filesystem drivers list.
...@@ -73,6 +74,9 @@ int register_filesystem(struct file_system_type * fs) ...@@ -73,6 +74,9 @@ int register_filesystem(struct file_system_type * fs)
int res = 0; int res = 0;
struct file_system_type ** p; struct file_system_type ** p;
if (fs->parameters && !fs_validate_description(fs->parameters))
return -EINVAL;
BUG_ON(strchr(fs->name, '.')); BUG_ON(strchr(fs->name, '.'));
if (fs->next) if (fs->next)
return -EBUSY; return -EBUSY;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -17,6 +17,7 @@ struct linux_binprm; ...@@ -17,6 +17,7 @@ struct linux_binprm;
struct path; struct path;
struct mount; struct mount;
struct shrink_control; struct shrink_control;
struct fs_context;
/* /*
* block_dev.c * block_dev.c
...@@ -51,9 +52,17 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied, ...@@ -51,9 +52,17 @@ int __generic_write_end(struct inode *inode, loff_t pos, unsigned copied,
*/ */
extern void __init chrdev_init(void); extern void __init chrdev_init(void);
/*
* fs_context.c
*/
extern int parse_monolithic_mount_data(struct fs_context *, void *);
extern void fc_drop_locked(struct fs_context *);
/* /*
* namei.c * namei.c
*/ */
extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *, extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *); const char *, unsigned int, struct path *);
...@@ -99,10 +108,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *); ...@@ -99,10 +108,8 @@ extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
/* /*
* super.c * super.c
*/ */
extern int do_remount_sb(struct super_block *, int, void *, int); extern int reconfigure_super(struct fs_context *);
extern bool trylock_super(struct super_block *sb); extern bool trylock_super(struct super_block *sb);
extern struct dentry *mount_fs(struct file_system_type *,
int, const char *, void *);
extern struct super_block *user_get_super(dev_t); extern struct super_block *user_get_super(dev_t);
/* /*
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/xattr.h> #include <linux/xattr.h>
#include <linux/kernfs.h> #include <linux/kernfs.h>
#include <linux/fs_context.h>
struct kernfs_iattrs { struct kernfs_iattrs {
struct iattr ia_iattr; struct iattr ia_iattr;
......
...@@ -22,16 +22,6 @@ ...@@ -22,16 +22,6 @@
struct kmem_cache *kernfs_node_cache; struct kmem_cache *kernfs_node_cache;
static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct kernfs_root *root = kernfs_info(sb)->root;
struct kernfs_syscall_ops *scops = root->syscall_ops;
if (scops && scops->remount_fs)
return scops->remount_fs(root, flags, data);
return 0;
}
static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
{ {
struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry));
...@@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = { ...@@ -60,7 +50,6 @@ const struct super_operations kernfs_sops = {
.drop_inode = generic_delete_inode, .drop_inode = generic_delete_inode,
.evict_inode = kernfs_evict_inode, .evict_inode = kernfs_evict_inode,
.remount_fs = kernfs_sop_remount_fs,
.show_options = kernfs_sop_show_options, .show_options = kernfs_sop_show_options,
.show_path = kernfs_sop_show_path, .show_path = kernfs_sop_show_path,
}; };
...@@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, ...@@ -222,7 +211,7 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
} while (true); } while (true);
} }
static int kernfs_fill_super(struct super_block *sb, unsigned long magic) static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc)
{ {
struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_super_info *info = kernfs_info(sb);
struct inode *inode; struct inode *inode;
...@@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) ...@@ -233,7 +222,7 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
sb->s_blocksize = PAGE_SIZE; sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT; sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = magic; sb->s_magic = kfc->magic;
sb->s_op = &kernfs_sops; sb->s_op = &kernfs_sops;
sb->s_xattr = kernfs_xattr_handlers; sb->s_xattr = kernfs_xattr_handlers;
if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP)
...@@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic) ...@@ -263,21 +252,20 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
return 0; return 0;
} }
static int kernfs_test_super(struct super_block *sb, void *data) static int kernfs_test_super(struct super_block *sb, struct fs_context *fc)
{ {
struct kernfs_super_info *sb_info = kernfs_info(sb); struct kernfs_super_info *sb_info = kernfs_info(sb);
struct kernfs_super_info *info = data; struct kernfs_super_info *info = fc->s_fs_info;
return sb_info->root == info->root && sb_info->ns == info->ns; return sb_info->root == info->root && sb_info->ns == info->ns;
} }
static int kernfs_set_super(struct super_block *sb, void *data) static int kernfs_set_super(struct super_block *sb, struct fs_context *fc)
{ {
int error; struct kernfs_fs_context *kfc = fc->fs_private;
error = set_anon_super(sb, data);
if (!error) kfc->ns_tag = NULL;
sb->s_fs_info = data; return set_anon_super_fc(sb, fc);
return error;
} }
/** /**
...@@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb) ...@@ -294,63 +282,60 @@ const void *kernfs_super_ns(struct super_block *sb)
} }
/** /**
* kernfs_mount_ns - kernfs mount helper * kernfs_get_tree - kernfs filesystem access/retrieval helper
* @fs_type: file_system_type of the fs being mounted * @fc: The filesystem context.
* @flags: mount flags specified for the mount
* @root: kernfs_root of the hierarchy being mounted
* @magic: file system specific magic number
* @new_sb_created: tell the caller if we allocated a new superblock
* @ns: optional namespace tag of the mount
*
* This is to be called from each kernfs user's file_system_type->mount()
* implementation, which should pass through the specified @fs_type and
* @flags, and specify the hierarchy and namespace tag to mount via @root
* and @ns, respectively.
* *
* The return value can be passed to the vfs layer verbatim. * This is to be called from each kernfs user's fs_context->ops->get_tree()
* implementation, which should set the specified ->@fs_type and ->@flags, and
* specify the hierarchy and namespace tag to mount via ->@root and ->@ns,
* respectively.
*/ */
struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, int kernfs_get_tree(struct fs_context *fc)
struct kernfs_root *root, unsigned long magic,
bool *new_sb_created, const void *ns)
{ {
struct kernfs_fs_context *kfc = fc->fs_private;
struct super_block *sb; struct super_block *sb;
struct kernfs_super_info *info; struct kernfs_super_info *info;
int error; int error;
info = kzalloc(sizeof(*info), GFP_KERNEL); info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info) if (!info)
return ERR_PTR(-ENOMEM); return -ENOMEM;
info->root = root; info->root = kfc->root;
info->ns = ns; info->ns = kfc->ns_tag;
INIT_LIST_HEAD(&info->node); INIT_LIST_HEAD(&info->node);
sb = sget_userns(fs_type, kernfs_test_super, kernfs_set_super, flags, fc->s_fs_info = info;
&init_user_ns, info); sb = sget_fc(fc, kernfs_test_super, kernfs_set_super);
if (IS_ERR(sb) || sb->s_fs_info != info)
kfree(info);
if (IS_ERR(sb)) if (IS_ERR(sb))
return ERR_CAST(sb); return PTR_ERR(sb);
if (new_sb_created)
*new_sb_created = !sb->s_root;
if (!sb->s_root) { if (!sb->s_root) {
struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_super_info *info = kernfs_info(sb);
error = kernfs_fill_super(sb, magic); kfc->new_sb_created = true;
error = kernfs_fill_super(sb, kfc);
if (error) { if (error) {
deactivate_locked_super(sb); deactivate_locked_super(sb);
return ERR_PTR(error); return error;
} }
sb->s_flags |= SB_ACTIVE; sb->s_flags |= SB_ACTIVE;
mutex_lock(&kernfs_mutex); mutex_lock(&kernfs_mutex);
list_add(&info->node, &root->supers); list_add(&info->node, &info->root->supers);
mutex_unlock(&kernfs_mutex); mutex_unlock(&kernfs_mutex);
} }
return dget(sb->s_root); fc->root = dget(sb->s_root);
return 0;
}
void kernfs_free_fs_context(struct fs_context *fc)
{
/* Note that we don't deal with kfc->ns_tag here. */
kfree(fc->s_fs_info);
fc->s_fs_info = NULL;
} }
/** /**
...@@ -377,36 +362,6 @@ void kernfs_kill_sb(struct super_block *sb) ...@@ -377,36 +362,6 @@ void kernfs_kill_sb(struct super_block *sb)
kfree(info); kfree(info);
} }
/**
* kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
* @kernfs_root: the kernfs_root in question
* @ns: the namespace tag
*
* Pin the superblock so the superblock won't be destroyed in subsequent
* operations. This can be used to block ->kill_sb() which may be useful
* for kernfs users which dynamically manage superblocks.
*
* Returns NULL if there's no superblock associated to this kernfs_root, or
* -EINVAL if the superblock is being freed.
*/
struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
{
struct kernfs_super_info *info;
struct super_block *sb = NULL;
mutex_lock(&kernfs_mutex);
list_for_each_entry(info, &root->supers, node) {
if (info->ns == ns) {
sb = info->sb;
if (!atomic_inc_not_zero(&info->sb->s_active))
sb = ERR_PTR(-EINVAL);
break;
}
}
mutex_unlock(&kernfs_mutex);
return sb;
}
void __init kernfs_init(void) void __init kernfs_init(void)
{ {
......
...@@ -146,3 +146,8 @@ static inline bool is_local_mountpoint(struct dentry *dentry) ...@@ -146,3 +146,8 @@ static inline bool is_local_mountpoint(struct dentry *dentry)
return __is_local_mountpoint(dentry); return __is_local_mountpoint(dentry);
} }
static inline bool is_anon_ns(struct mnt_namespace *ns)
{
return ns->seq == 0;
}
...@@ -2333,7 +2333,7 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path ...@@ -2333,7 +2333,7 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
return err; return err;
} }
static int filename_lookup(int dfd, struct filename *name, unsigned flags, int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root) struct path *path, struct path *root)
{ {
int retval; int retval;
......
This diff is collapsed.
...@@ -261,11 +261,8 @@ int orangefs_getattr(const struct path *path, struct kstat *stat, ...@@ -261,11 +261,8 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
generic_fillattr(inode, stat); generic_fillattr(inode, stat);
/* override block size reported to stat */ /* override block size reported to stat */
if (request_mask & STATX_SIZE) if (!(request_mask & STATX_SIZE))
stat->result_mask = STATX_BASIC_STATS; stat->result_mask &= ~STATX_SIZE;
else
stat->result_mask = STATX_BASIC_STATS &
~STATX_SIZE;
stat->attributes_mask = STATX_ATTR_IMMUTABLE | stat->attributes_mask = STATX_ATTR_IMMUTABLE |
STATX_ATTR_APPEND; STATX_ATTR_APPEND;
......
...@@ -226,8 +226,15 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe, ...@@ -226,8 +226,15 @@ void generic_pipe_buf_release(struct pipe_inode_info *pipe,
} }
EXPORT_SYMBOL(generic_pipe_buf_release); EXPORT_SYMBOL(generic_pipe_buf_release);
/* New data written to a pipe may be appended to a buffer with this type. */
static const struct pipe_buf_operations anon_pipe_buf_ops = { static const struct pipe_buf_operations anon_pipe_buf_ops = {
.can_merge = 1, .confirm = generic_pipe_buf_confirm,
.release = anon_pipe_buf_release,
.steal = anon_pipe_buf_steal,
.get = generic_pipe_buf_get,
};
static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = {
.confirm = generic_pipe_buf_confirm, .confirm = generic_pipe_buf_confirm,
.release = anon_pipe_buf_release, .release = anon_pipe_buf_release,
.steal = anon_pipe_buf_steal, .steal = anon_pipe_buf_steal,
...@@ -235,13 +242,32 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { ...@@ -235,13 +242,32 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
}; };
static const struct pipe_buf_operations packet_pipe_buf_ops = { static const struct pipe_buf_operations packet_pipe_buf_ops = {
.can_merge = 0,
.confirm = generic_pipe_buf_confirm, .confirm = generic_pipe_buf_confirm,
.release = anon_pipe_buf_release, .release = anon_pipe_buf_release,
.steal = anon_pipe_buf_steal, .steal = anon_pipe_buf_steal,
.get = generic_pipe_buf_get, .get = generic_pipe_buf_get,
}; };
/**
* pipe_buf_mark_unmergeable - mark a &struct pipe_buffer as unmergeable
* @buf: the buffer to mark
*
* Description:
* This function ensures that no future writes will be merged into the
* given &struct pipe_buffer. This is necessary when multiple pipe buffers
* share the same backing page.
*/
void pipe_buf_mark_unmergeable(struct pipe_buffer *buf)
{
if (buf->ops == &anon_pipe_buf_ops)
buf->ops = &anon_pipe_buf_nomerge_ops;
}
static bool pipe_buf_can_merge(struct pipe_buffer *buf)
{
return buf->ops == &anon_pipe_buf_ops;
}
static ssize_t static ssize_t
pipe_read(struct kiocb *iocb, struct iov_iter *to) pipe_read(struct kiocb *iocb, struct iov_iter *to)
{ {
...@@ -379,7 +405,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -379,7 +405,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
struct pipe_buffer *buf = pipe->bufs + lastbuf; struct pipe_buffer *buf = pipe->bufs + lastbuf;
int offset = buf->offset + buf->len; int offset = buf->offset + buf->len;
if (buf->ops->can_merge && offset + chars <= PAGE_SIZE) { if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) {
ret = pipe_buf_confirm(pipe, buf); ret = pipe_buf_confirm(pipe, buf);
if (ret) if (ret)
goto out; goto out;
......
...@@ -214,7 +214,6 @@ static struct mount *next_group(struct mount *m, struct mount *origin) ...@@ -214,7 +214,6 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
} }
/* all accesses are serialized by namespace_sem */ /* all accesses are serialized by namespace_sem */
static struct user_namespace *user_ns;
static struct mount *last_dest, *first_source, *last_source, *dest_master; static struct mount *last_dest, *first_source, *last_source, *dest_master;
static struct mountpoint *mp; static struct mountpoint *mp;
static struct hlist_head *list; static struct hlist_head *list;
...@@ -260,9 +259,6 @@ static int propagate_one(struct mount *m) ...@@ -260,9 +259,6 @@ static int propagate_one(struct mount *m)
type |= CL_MAKE_SHARED; type |= CL_MAKE_SHARED;
} }
/* Notice when we are propagating across user namespaces */
if (m->mnt_ns->user_ns != user_ns)
type |= CL_UNPRIVILEGED;
child = copy_tree(last_source, last_source->mnt.mnt_root, type); child = copy_tree(last_source, last_source->mnt.mnt_root, type);
if (IS_ERR(child)) if (IS_ERR(child))
return PTR_ERR(child); return PTR_ERR(child);
...@@ -303,7 +299,6 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, ...@@ -303,7 +299,6 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
* propagate_one(); everything is serialized by namespace_sem, * propagate_one(); everything is serialized by namespace_sem,
* so globals will do just fine. * so globals will do just fine.
*/ */
user_ns = current->nsproxy->mnt_ns->user_ns;
last_dest = dest_mnt; last_dest = dest_mnt;
first_source = source_mnt; first_source = source_mnt;
last_source = source_mnt; last_source = source_mnt;
......
...@@ -27,8 +27,7 @@ ...@@ -27,8 +27,7 @@
#define CL_MAKE_SHARED 0x08 #define CL_MAKE_SHARED 0x08
#define CL_PRIVATE 0x10 #define CL_PRIVATE 0x10
#define CL_SHARED_TO_SLAVE 0x20 #define CL_SHARED_TO_SLAVE 0x20
#define CL_UNPRIVILEGED 0x40 #define CL_COPY_MNT_NS_FILE 0x40
#define CL_COPY_MNT_NS_FILE 0x80
#define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE) #define CL_COPY_ALL (CL_COPY_UNBINDABLE | CL_COPY_MNT_NS_FILE)
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/magic.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -122,13 +121,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root) ...@@ -122,13 +121,12 @@ static int proc_show_options(struct seq_file *seq, struct dentry *root)
return 0; return 0;
} }
static const struct super_operations proc_sops = { const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode, .alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode, .destroy_inode = proc_destroy_inode,
.drop_inode = generic_delete_inode, .drop_inode = generic_delete_inode,
.evict_inode = proc_evict_inode, .evict_inode = proc_evict_inode,
.statfs = simple_statfs, .statfs = simple_statfs,
.remount_fs = proc_remount,
.show_options = proc_show_options, .show_options = proc_show_options,
}; };
...@@ -488,51 +486,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) ...@@ -488,51 +486,3 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
pde_put(de); pde_put(de);
return inode; return inode;
} }
int proc_fill_super(struct super_block *s, void *data, int silent)
{
struct pid_namespace *ns = get_pid_ns(s->s_fs_info);
struct inode *root_inode;
int ret;
if (!proc_parse_options(data, ns))
return -EINVAL;
/* User space would break if executables or devices appear on proc */
s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
s->s_time_gran = 1;
/*
* procfs isn't actually a stacking filesystem; however, there is
* too much magic going on inside it to permit stacking things on
* top of it
*/
s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
/* procfs dentries and inodes don't require IO to create */
s->s_shrink.seeks = 0;
pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
if (!root_inode) {
pr_err("proc_fill_super: get root inode failed\n");
return -ENOMEM;
}
s->s_root = d_make_root(root_inode);
if (!s->s_root) {
pr_err("proc_fill_super: allocate dentry failed\n");
return -ENOMEM;
}
ret = proc_setup_self(s);
if (ret) {
return ret;
}
return proc_setup_thread_self(s);
}
...@@ -205,13 +205,12 @@ struct pde_opener { ...@@ -205,13 +205,12 @@ struct pde_opener {
struct completion *c; struct completion *c;
} __randomize_layout; } __randomize_layout;
extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_link_inode_operations;
extern const struct inode_operations proc_pid_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations;
extern const struct super_operations proc_sops;
void proc_init_kmemcache(void); void proc_init_kmemcache(void);
void set_proc_pid_nlink(void); void set_proc_pid_nlink(void);
extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
extern int proc_fill_super(struct super_block *, void *data, int flags);
extern void proc_entry_rundown(struct proc_dir_entry *); extern void proc_entry_rundown(struct proc_dir_entry *);
/* /*
...@@ -269,10 +268,8 @@ static inline void proc_tty_init(void) {} ...@@ -269,10 +268,8 @@ static inline void proc_tty_init(void) {}
* root.c * root.c
*/ */
extern struct proc_dir_entry proc_root; extern struct proc_dir_entry proc_root;
extern int proc_parse_options(char *options, struct pid_namespace *pid);
extern void proc_self_init(void); extern void proc_self_init(void);
extern int proc_remount(struct super_block *, int *, char *);
/* /*
* task_[no]mmu.c * task_[no]mmu.c
......
...@@ -19,86 +19,178 @@ ...@@ -19,86 +19,178 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
#include <linux/fs_context.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/pid_namespace.h> #include <linux/pid_namespace.h>
#include <linux/parser.h> #include <linux/fs_parser.h>
#include <linux/cred.h> #include <linux/cred.h>
#include <linux/magic.h>
#include <linux/slab.h>
#include "internal.h" #include "internal.h"
enum { struct proc_fs_context {
Opt_gid, Opt_hidepid, Opt_err, struct pid_namespace *pid_ns;
unsigned int mask;
int hidepid;
int gid;
}; };
static const match_table_t tokens = { enum proc_param {
{Opt_hidepid, "hidepid=%u"}, Opt_gid,
{Opt_gid, "gid=%u"}, Opt_hidepid,
{Opt_err, NULL},
}; };
int proc_parse_options(char *options, struct pid_namespace *pid) static const struct fs_parameter_spec proc_param_specs[] = {
{ fsparam_u32("gid", Opt_gid),
char *p; fsparam_u32("hidepid", Opt_hidepid),
substring_t args[MAX_OPT_ARGS]; {}
int option; };
static const struct fs_parameter_description proc_fs_parameters = {
.name = "proc",
.specs = proc_param_specs,
};
if (!options) static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
return 1; {
struct proc_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
int opt;
while ((p = strsep(&options, ",")) != NULL) { opt = fs_parse(fc, &proc_fs_parameters, param, &result);
int token; if (opt < 0)
if (!*p) return opt;
continue;
args[0].to = args[0].from = NULL; switch (opt) {
token = match_token(p, tokens, args);
switch (token) {
case Opt_gid: case Opt_gid:
if (match_int(&args[0], &option)) ctx->gid = result.uint_32;
return 0;
pid->pid_gid = make_kgid(current_user_ns(), option);
break; break;
case Opt_hidepid: case Opt_hidepid:
if (match_int(&args[0], &option)) ctx->hidepid = result.uint_32;
return 0; if (ctx->hidepid < HIDEPID_OFF ||
if (option < HIDEPID_OFF || ctx->hidepid > HIDEPID_INVISIBLE)
option > HIDEPID_INVISIBLE) { return invalf(fc, "proc: hidepid value must be between 0 and 2.\n");
pr_err("proc: hidepid value must be between 0 and 2.\n");
return 0;
}
pid->hide_pid = option;
break; break;
default: default:
pr_err("proc: unrecognized mount option \"%s\" " return -EINVAL;
"or missing value\n", p); }
ctx->mask |= 1 << opt;
return 0; return 0;
}
static void proc_apply_options(struct super_block *s,
struct fs_context *fc,
struct pid_namespace *pid_ns,
struct user_namespace *user_ns)
{
struct proc_fs_context *ctx = fc->fs_private;
if (ctx->mask & (1 << Opt_gid))
pid_ns->pid_gid = make_kgid(user_ns, ctx->gid);
if (ctx->mask & (1 << Opt_hidepid))
pid_ns->hide_pid = ctx->hidepid;
}
static int proc_fill_super(struct super_block *s, struct fs_context *fc)
{
struct pid_namespace *pid_ns = get_pid_ns(s->s_fs_info);
struct inode *root_inode;
int ret;
proc_apply_options(s, fc, pid_ns, current_user_ns());
/* User space would break if executables or devices appear on proc */
s->s_iflags |= SB_I_USERNS_VISIBLE | SB_I_NOEXEC | SB_I_NODEV;
s->s_flags |= SB_NODIRATIME | SB_NOSUID | SB_NOEXEC;
s->s_blocksize = 1024;
s->s_blocksize_bits = 10;
s->s_magic = PROC_SUPER_MAGIC;
s->s_op = &proc_sops;
s->s_time_gran = 1;
/*
* procfs isn't actually a stacking filesystem; however, there is
* too much magic going on inside it to permit stacking things on
* top of it
*/
s->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
/* procfs dentries and inodes don't require IO to create */
s->s_shrink.seeks = 0;
pde_get(&proc_root);
root_inode = proc_get_inode(s, &proc_root);
if (!root_inode) {
pr_err("proc_fill_super: get root inode failed\n");
return -ENOMEM;
} }
s->s_root = d_make_root(root_inode);
if (!s->s_root) {
pr_err("proc_fill_super: allocate dentry failed\n");
return -ENOMEM;
} }
return 1; ret = proc_setup_self(s);
if (ret) {
return ret;
}
return proc_setup_thread_self(s);
} }
int proc_remount(struct super_block *sb, int *flags, char *data) static int proc_reconfigure(struct fs_context *fc)
{ {
struct super_block *sb = fc->root->d_sb;
struct pid_namespace *pid = sb->s_fs_info; struct pid_namespace *pid = sb->s_fs_info;
sync_filesystem(sb); sync_filesystem(sb);
return !proc_parse_options(data, pid);
proc_apply_options(sb, fc, pid, current_user_ns());
return 0;
} }
static struct dentry *proc_mount(struct file_system_type *fs_type, static int proc_get_tree(struct fs_context *fc)
int flags, const char *dev_name, void *data)
{ {
struct pid_namespace *ns; struct proc_fs_context *ctx = fc->fs_private;
if (flags & SB_KERNMOUNT) { put_user_ns(fc->user_ns);
ns = data; fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
data = NULL; fc->s_fs_info = ctx->pid_ns;
} else { return vfs_get_super(fc, vfs_get_keyed_super, proc_fill_super);
ns = task_active_pid_ns(current); }
}
static void proc_fs_context_free(struct fs_context *fc)
{
struct proc_fs_context *ctx = fc->fs_private;
return mount_ns(fs_type, flags, data, ns, ns->user_ns, proc_fill_super); if (ctx->pid_ns)
put_pid_ns(ctx->pid_ns);
kfree(ctx);
}
static const struct fs_context_operations proc_fs_context_ops = {
.free = proc_fs_context_free,
.parse_param = proc_parse_param,
.get_tree = proc_get_tree,
.reconfigure = proc_reconfigure,
};
static int proc_init_fs_context(struct fs_context *fc)
{
struct proc_fs_context *ctx;
ctx = kzalloc(sizeof(struct proc_fs_context), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->fs_private = ctx;
fc->ops = &proc_fs_context_ops;
return 0;
} }
static void proc_kill_sb(struct super_block *sb) static void proc_kill_sb(struct super_block *sb)
...@@ -116,7 +208,8 @@ static void proc_kill_sb(struct super_block *sb) ...@@ -116,7 +208,8 @@ static void proc_kill_sb(struct super_block *sb)
static struct file_system_type proc_fs_type = { static struct file_system_type proc_fs_type = {
.name = "proc", .name = "proc",
.mount = proc_mount, .init_fs_context = proc_init_fs_context,
.parameters = &proc_fs_parameters,
.kill_sb = proc_kill_sb, .kill_sb = proc_kill_sb,
.fs_flags = FS_USERNS_MOUNT, .fs_flags = FS_USERNS_MOUNT,
}; };
...@@ -209,9 +302,28 @@ struct proc_dir_entry proc_root = { ...@@ -209,9 +302,28 @@ struct proc_dir_entry proc_root = {
int pid_ns_prepare_proc(struct pid_namespace *ns) int pid_ns_prepare_proc(struct pid_namespace *ns)
{ {
struct proc_fs_context *ctx;
struct fs_context *fc;
struct vfsmount *mnt; struct vfsmount *mnt;
mnt = kern_mount_data(&proc_fs_type, ns); fc = fs_context_for_mount(&proc_fs_type, SB_KERNMOUNT);
if (IS_ERR(fc))
return PTR_ERR(fc);
if (fc->user_ns != ns->user_ns) {
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(ns->user_ns);
}
ctx = fc->fs_private;
if (ctx->pid_ns != ns) {
put_pid_ns(ctx->pid_ns);
get_pid_ns(ns);
ctx->pid_ns = ns;
}
mnt = fc_mount(fc);
put_fs_context(fc);
if (IS_ERR(mnt)) if (IS_ERR(mnt))
return PTR_ERR(mnt); return PTR_ERR(mnt);
......
...@@ -478,8 +478,8 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t ...@@ -478,8 +478,8 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
return ret; return ret;
} }
ssize_t __vfs_write(struct file *file, const char __user *p, size_t count, static ssize_t __vfs_write(struct file *file, const char __user *p,
loff_t *pos) size_t count, loff_t *pos)
{ {
if (file->f_op->write) if (file->f_op->write)
return file->f_op->write(file, p, count, pos); return file->f_op->write(file, p, count, pos);
...@@ -1238,6 +1238,9 @@ COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, ...@@ -1238,6 +1238,9 @@ COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
const struct compat_iovec __user *,vec, const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags) unsigned long, vlen, loff_t, pos, rwf_t, flags)
{ {
if (pos == -1)
return do_compat_readv(fd, vec, vlen, flags);
return do_compat_preadv64(fd, vec, vlen, pos, flags); return do_compat_preadv64(fd, vec, vlen, pos, flags);
} }
#endif #endif
...@@ -1344,6 +1347,9 @@ COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, ...@@ -1344,6 +1347,9 @@ COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
const struct compat_iovec __user *,vec, const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos, rwf_t, flags) unsigned long, vlen, loff_t, pos, rwf_t, flags)
{ {
if (pos == -1)
return do_compat_writev(fd, vec, vlen, flags);
return do_compat_pwritev64(fd, vec, vlen, pos, flags); return do_compat_pwritev64(fd, vec, vlen, pos, flags);
} }
#endif #endif
......
...@@ -138,7 +138,6 @@ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, ...@@ -138,7 +138,6 @@ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
} }
const struct pipe_buf_operations page_cache_pipe_buf_ops = { const struct pipe_buf_operations page_cache_pipe_buf_ops = {
.can_merge = 0,
.confirm = page_cache_pipe_buf_confirm, .confirm = page_cache_pipe_buf_confirm,
.release = page_cache_pipe_buf_release, .release = page_cache_pipe_buf_release,
.steal = page_cache_pipe_buf_steal, .steal = page_cache_pipe_buf_steal,
...@@ -156,7 +155,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, ...@@ -156,7 +155,6 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,
} }
static const struct pipe_buf_operations user_page_pipe_buf_ops = { static const struct pipe_buf_operations user_page_pipe_buf_ops = {
.can_merge = 0,
.confirm = generic_pipe_buf_confirm, .confirm = generic_pipe_buf_confirm,
.release = page_cache_pipe_buf_release, .release = page_cache_pipe_buf_release,
.steal = user_page_pipe_buf_steal, .steal = user_page_pipe_buf_steal,
...@@ -326,7 +324,6 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, ...@@ -326,7 +324,6 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
EXPORT_SYMBOL(generic_file_splice_read); EXPORT_SYMBOL(generic_file_splice_read);
const struct pipe_buf_operations default_pipe_buf_ops = { const struct pipe_buf_operations default_pipe_buf_ops = {
.can_merge = 0,
.confirm = generic_pipe_buf_confirm, .confirm = generic_pipe_buf_confirm,
.release = generic_pipe_buf_release, .release = generic_pipe_buf_release,
.steal = generic_pipe_buf_steal, .steal = generic_pipe_buf_steal,
...@@ -341,7 +338,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, ...@@ -341,7 +338,6 @@ static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
/* Pipe buffer operations for a socket and similar. */ /* Pipe buffer operations for a socket and similar. */
const struct pipe_buf_operations nosteal_pipe_buf_ops = { const struct pipe_buf_operations nosteal_pipe_buf_ops = {
.can_merge = 0,
.confirm = generic_pipe_buf_confirm, .confirm = generic_pipe_buf_confirm,
.release = generic_pipe_buf_release, .release = generic_pipe_buf_release,
.steal = generic_pipe_buf_nosteal, .steal = generic_pipe_buf_nosteal,
...@@ -1597,6 +1593,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, ...@@ -1597,6 +1593,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
*/ */
obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
pipe_buf_mark_unmergeable(obuf);
obuf->len = len; obuf->len = len;
opipe->nrbufs++; opipe->nrbufs++;
ibuf->offset += obuf->len; ibuf->offset += obuf->len;
...@@ -1671,6 +1669,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, ...@@ -1671,6 +1669,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
*/ */
obuf->flags &= ~PIPE_BUF_FLAG_GIFT; obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
pipe_buf_mark_unmergeable(obuf);
if (obuf->len > len) if (obuf->len > len)
obuf->len = len; obuf->len = len;
......
...@@ -45,11 +45,6 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) ...@@ -45,11 +45,6 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
stat->ctime = inode->i_ctime; stat->ctime = inode->i_ctime;
stat->blksize = i_blocksize(inode); stat->blksize = i_blocksize(inode);
stat->blocks = inode->i_blocks; stat->blocks = inode->i_blocks;
if (IS_NOATIME(inode))
stat->result_mask &= ~STATX_ATIME;
if (IS_AUTOMOUNT(inode))
stat->attributes |= STATX_ATTR_AUTOMOUNT;
} }
EXPORT_SYMBOL(generic_fillattr); EXPORT_SYMBOL(generic_fillattr);
...@@ -75,6 +70,13 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, ...@@ -75,6 +70,13 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
stat->result_mask |= STATX_BASIC_STATS; stat->result_mask |= STATX_BASIC_STATS;
request_mask &= STATX_ALL; request_mask &= STATX_ALL;
query_flags &= KSTAT_QUERY_FLAGS; query_flags &= KSTAT_QUERY_FLAGS;
/* allow the fs to override these if it really wants to */
if (IS_NOATIME(inode))
stat->result_mask &= ~STATX_ATIME;
if (IS_AUTOMOUNT(inode))
stat->attributes |= STATX_ATTR_AUTOMOUNT;
if (inode->i_op->getattr) if (inode->i_op->getattr)
return inode->i_op->getattr(path, stat, request_mask, return inode->i_op->getattr(path, stat, request_mask,
query_flags); query_flags);
......
This diff is collapsed.
...@@ -13,34 +13,69 @@ ...@@ -13,34 +13,69 @@
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/slab.h>
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
#include <linux/fs_context.h>
#include <net/net_namespace.h>
#include "sysfs.h" #include "sysfs.h"
static struct kernfs_root *sysfs_root; static struct kernfs_root *sysfs_root;
struct kernfs_node *sysfs_root_kn; struct kernfs_node *sysfs_root_kn;
static struct dentry *sysfs_mount(struct file_system_type *fs_type, static int sysfs_get_tree(struct fs_context *fc)
int flags, const char *dev_name, void *data)
{ {
struct dentry *root; struct kernfs_fs_context *kfc = fc->fs_private;
void *ns; int ret;
bool new_sb = false;
if (!(flags & SB_KERNMOUNT)) { ret = kernfs_get_tree(fc);
if (ret)
return ret;
if (kfc->new_sb_created)
fc->root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return 0;
}
static void sysfs_fs_context_free(struct fs_context *fc)
{
struct kernfs_fs_context *kfc = fc->fs_private;
if (kfc->ns_tag)
kobj_ns_drop(KOBJ_NS_TYPE_NET, kfc->ns_tag);
kernfs_free_fs_context(fc);
kfree(kfc);
}
static const struct fs_context_operations sysfs_fs_context_ops = {
.free = sysfs_fs_context_free,
.get_tree = sysfs_get_tree,
};
static int sysfs_init_fs_context(struct fs_context *fc)
{
struct kernfs_fs_context *kfc;
struct net *netns;
if (!(fc->sb_flags & SB_KERNMOUNT)) {
if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET)) if (!kobj_ns_current_may_mount(KOBJ_NS_TYPE_NET))
return ERR_PTR(-EPERM); return -EPERM;
} }
ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET); kfc = kzalloc(sizeof(struct kernfs_fs_context), GFP_KERNEL);
root = kernfs_mount_ns(fs_type, flags, sysfs_root, if (!kfc)
SYSFS_MAGIC, &new_sb, ns); return -ENOMEM;
if (!new_sb)
kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
else if (!IS_ERR(root))
root->d_sb->s_iflags |= SB_I_USERNS_VISIBLE;
return root; kfc->ns_tag = netns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
kfc->root = sysfs_root;
kfc->magic = SYSFS_MAGIC;
fc->fs_private = kfc;
fc->ops = &sysfs_fs_context_ops;
if (fc->user_ns)
put_user_ns(fc->user_ns);
fc->user_ns = get_user_ns(netns->user_ns);
fc->global = true;
return 0;
} }
static void sysfs_kill_sb(struct super_block *sb) static void sysfs_kill_sb(struct super_block *sb)
...@@ -53,7 +88,7 @@ static void sysfs_kill_sb(struct super_block *sb) ...@@ -53,7 +88,7 @@ static void sysfs_kill_sb(struct super_block *sb)
static struct file_system_type sysfs_fs_type = { static struct file_system_type sysfs_fs_type = {
.name = "sysfs", .name = "sysfs",
.mount = sysfs_mount, .init_fs_context = sysfs_init_fs_context,
.kill_sb = sysfs_kill_sb, .kill_sb = sysfs_kill_sb,
.fs_flags = FS_USERNS_MOUNT, .fs_flags = FS_USERNS_MOUNT,
}; };
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
#define EPROBE_DEFER 517 /* Driver requests probe retry */ #define EPROBE_DEFER 517 /* Driver requests probe retry */
#define EOPENSTALE 518 /* open found a stale dentry */ #define EOPENSTALE 518 /* open found a stale dentry */
#define ENOPARAM 519 /* Parameter not supported */
/* Defined for the NFSv3 protocol */ /* Defined for the NFSv3 protocol */
#define EBADHANDLE 521 /* Illegal NFS file handle */ #define EBADHANDLE 521 /* Illegal NFS file handle */
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -76,6 +76,22 @@ ...@@ -76,6 +76,22 @@
* changes on the process such as clearing out non-inheritable signal * changes on the process such as clearing out non-inheritable signal
* state. This is called immediately after commit_creds(). * state. This is called immediately after commit_creds().
* *
* Security hooks for mount using fs_context.
* [See also Documentation/filesystems/mounting.txt]
*
* @fs_context_dup:
* Allocate and attach a security structure to sc->security. This pointer
* is initialised to NULL by the caller.
* @fc indicates the new filesystem context.
* @src_fc indicates the original filesystem context.
* @fs_context_parse_param:
* Userspace provided a parameter to configure a superblock. The LSM may
* reject it with an error and may use it for itself, in which case it
* should return 0; otherwise it should return -ENOPARAM to pass it on to
* the filesystem.
* @fc indicates the filesystem context.
* @param The parameter
*
* Security hooks for filesystem operations. * Security hooks for filesystem operations.
* *
* @sb_alloc_security: * @sb_alloc_security:
...@@ -1459,6 +1475,9 @@ union security_list_options { ...@@ -1459,6 +1475,9 @@ union security_list_options {
void (*bprm_committing_creds)(struct linux_binprm *bprm); void (*bprm_committing_creds)(struct linux_binprm *bprm);
void (*bprm_committed_creds)(struct linux_binprm *bprm); void (*bprm_committed_creds)(struct linux_binprm *bprm);
int (*fs_context_dup)(struct fs_context *fc, struct fs_context *src_sc);
int (*fs_context_parse_param)(struct fs_context *fc, struct fs_parameter *param);
int (*sb_alloc_security)(struct super_block *sb); int (*sb_alloc_security)(struct super_block *sb);
void (*sb_free_security)(struct super_block *sb); void (*sb_free_security)(struct super_block *sb);
void (*sb_free_mnt_opts)(void *mnt_opts); void (*sb_free_mnt_opts)(void *mnt_opts);
...@@ -1800,6 +1819,8 @@ struct security_hook_heads { ...@@ -1800,6 +1819,8 @@ struct security_hook_heads {
struct hlist_head bprm_check_security; struct hlist_head bprm_check_security;
struct hlist_head bprm_committing_creds; struct hlist_head bprm_committing_creds;
struct hlist_head bprm_committed_creds; struct hlist_head bprm_committed_creds;
struct hlist_head fs_context_dup;
struct hlist_head fs_context_parse_param;
struct hlist_head sb_alloc_security; struct hlist_head sb_alloc_security;
struct hlist_head sb_free_security; struct hlist_head sb_free_security;
struct hlist_head sb_free_mnt_opts; struct hlist_head sb_free_mnt_opts;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -42,7 +42,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, ...@@ -42,7 +42,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
goto fail; goto fail;
err = -ENOMEM; err = -ENOMEM;
ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL); ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
if (ns == NULL) if (ns == NULL)
goto fail_dec; goto fail_dec;
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment