Commit f03c6599 authored by Al Viro's avatar Al Viro

sanitize vfsmount refcounting changes

Instead of splitting refcount between (per-cpu) mnt_count
and (SMP-only) mnt_longrefs, make all references contribute
to mnt_count again and keep track of how many are longterm
ones.

Accounting rules for longterm count:
	* 1 for each fs_struct.root.mnt
	* 1 for each fs_struct.pwd.mnt
	* 1 for having non-NULL ->mnt_ns
	* decrement to 0 happens only under vfsmount lock exclusive

That allows nice common case for mntput() - since we can't drop the
final reference until after mnt_longterm has reached 0 due to the rules
above, mntput() can grab vfsmount lock shared and check mnt_longterm.
If it turns out to be non-zero (which is the common case), we know
that this is not the final mntput() and can just blindly decrement
percpu mnt_count.  Otherwise we grab vfsmount lock exclusive and
do usual decrement-and-check of percpu mnt_count.

For fs_struct.c we have mnt_make_longterm() and mnt_make_shortterm();
namespace.c uses the latter in places where we don't already hold
vfsmount lock exclusive and opencodes a few remaining spots where
we need to manipulate mnt_longterm.

Note that we mostly revert the code outside of fs/namespace.c back
to what we used to have; in particular, normal code doesn't need
to care about two kinds of references, etc.  And we get to keep
the optimization Nick's variant had bought us...
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 7b8a53fd
...@@ -1201,7 +1201,7 @@ static int __init init_mtdchar(void) ...@@ -1201,7 +1201,7 @@ static int __init init_mtdchar(void)
static void __exit cleanup_mtdchar(void) static void __exit cleanup_mtdchar(void)
{ {
unregister_mtd_user(&mtdchar_notifier); unregister_mtd_user(&mtdchar_notifier);
mntput_long(mtd_inode_mnt); mntput(mtd_inode_mnt);
unregister_filesystem(&mtd_inodefs_type); unregister_filesystem(&mtd_inodefs_type);
__unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd");
} }
......
...@@ -233,7 +233,7 @@ static int __init anon_inode_init(void) ...@@ -233,7 +233,7 @@ static int __init anon_inode_init(void)
return 0; return 0;
err_mntput: err_mntput:
mntput_long(anon_inode_mnt); mntput(anon_inode_mnt);
err_unregister_filesystem: err_unregister_filesystem:
unregister_filesystem(&anon_inode_fs_type); unregister_filesystem(&anon_inode_fs_type);
err_exit: err_exit:
......
...@@ -4,6 +4,19 @@ ...@@ -4,6 +4,19 @@
#include <linux/path.h> #include <linux/path.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include "internal.h"
static inline void path_get_longterm(struct path *path)
{
path_get(path);
mnt_make_longterm(path->mnt);
}
static inline void path_put_longterm(struct path *path)
{
mnt_make_shortterm(path->mnt);
path_put(path);
}
/* /*
* Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values.
...@@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) ...@@ -17,11 +30,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
write_seqcount_begin(&fs->seq); write_seqcount_begin(&fs->seq);
old_root = fs->root; old_root = fs->root;
fs->root = *path; fs->root = *path;
path_get_long(path); path_get_longterm(path);
write_seqcount_end(&fs->seq); write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock); spin_unlock(&fs->lock);
if (old_root.dentry) if (old_root.dentry)
path_put_long(&old_root); path_put_longterm(&old_root);
} }
/* /*
...@@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) ...@@ -36,12 +49,12 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
write_seqcount_begin(&fs->seq); write_seqcount_begin(&fs->seq);
old_pwd = fs->pwd; old_pwd = fs->pwd;
fs->pwd = *path; fs->pwd = *path;
path_get_long(path); path_get_longterm(path);
write_seqcount_end(&fs->seq); write_seqcount_end(&fs->seq);
spin_unlock(&fs->lock); spin_unlock(&fs->lock);
if (old_pwd.dentry) if (old_pwd.dentry)
path_put_long(&old_pwd); path_put_longterm(&old_pwd);
} }
void chroot_fs_refs(struct path *old_root, struct path *new_root) void chroot_fs_refs(struct path *old_root, struct path *new_root)
...@@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) ...@@ -59,13 +72,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
write_seqcount_begin(&fs->seq); write_seqcount_begin(&fs->seq);
if (fs->root.dentry == old_root->dentry if (fs->root.dentry == old_root->dentry
&& fs->root.mnt == old_root->mnt) { && fs->root.mnt == old_root->mnt) {
path_get_long(new_root); path_get_longterm(new_root);
fs->root = *new_root; fs->root = *new_root;
count++; count++;
} }
if (fs->pwd.dentry == old_root->dentry if (fs->pwd.dentry == old_root->dentry
&& fs->pwd.mnt == old_root->mnt) { && fs->pwd.mnt == old_root->mnt) {
path_get_long(new_root); path_get_longterm(new_root);
fs->pwd = *new_root; fs->pwd = *new_root;
count++; count++;
} }
...@@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) ...@@ -76,13 +89,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
} while_each_thread(g, p); } while_each_thread(g, p);
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
while (count--) while (count--)
path_put_long(old_root); path_put_longterm(old_root);
} }
void free_fs_struct(struct fs_struct *fs) void free_fs_struct(struct fs_struct *fs)
{ {
path_put_long(&fs->root); path_put_longterm(&fs->root);
path_put_long(&fs->pwd); path_put_longterm(&fs->pwd);
kmem_cache_free(fs_cachep, fs); kmem_cache_free(fs_cachep, fs);
} }
...@@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) ...@@ -118,9 +131,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
spin_lock(&old->lock); spin_lock(&old->lock);
fs->root = old->root; fs->root = old->root;
path_get_long(&fs->root); path_get_longterm(&fs->root);
fs->pwd = old->pwd; fs->pwd = old->pwd;
path_get_long(&fs->pwd); path_get_longterm(&fs->pwd);
spin_unlock(&old->lock); spin_unlock(&old->lock);
} }
return fs; return fs;
......
...@@ -73,6 +73,9 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); ...@@ -73,6 +73,9 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
extern int do_add_mount(struct vfsmount *, struct path *, int); extern int do_add_mount(struct vfsmount *, struct path *, int);
extern void mnt_clear_expiry(struct vfsmount *); extern void mnt_clear_expiry(struct vfsmount *);
extern void mnt_make_longterm(struct vfsmount *);
extern void mnt_make_shortterm(struct vfsmount *);
extern void __init mnt_init(void); extern void __init mnt_init(void);
DECLARE_BRLOCK(vfsmount_lock); DECLARE_BRLOCK(vfsmount_lock);
......
...@@ -367,18 +367,6 @@ void path_get(struct path *path) ...@@ -367,18 +367,6 @@ void path_get(struct path *path)
} }
EXPORT_SYMBOL(path_get); EXPORT_SYMBOL(path_get);
/**
* path_get_long - get a long reference to a path
* @path: path to get the reference to
*
* Given a path increment the reference count to the dentry and the vfsmount.
*/
void path_get_long(struct path *path)
{
mntget_long(path->mnt);
dget(path->dentry);
}
/** /**
* path_put - put a reference to a path * path_put - put a reference to a path
* @path: path to put the reference to * @path: path to put the reference to
...@@ -392,18 +380,6 @@ void path_put(struct path *path) ...@@ -392,18 +380,6 @@ void path_put(struct path *path)
} }
EXPORT_SYMBOL(path_put); EXPORT_SYMBOL(path_put);
/**
* path_put_long - put a long reference to a path
* @path: path to put the reference to
*
* Given a path decrement the reference count to the dentry and the vfsmount.
*/
void path_put_long(struct path *path)
{
dput(path->dentry);
mntput_long(path->mnt);
}
/** /**
* nameidata_drop_rcu - drop this nameidata out of rcu-walk * nameidata_drop_rcu - drop this nameidata out of rcu-walk
* @nd: nameidata pathwalk data to drop * @nd: nameidata pathwalk data to drop
......
...@@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt) ...@@ -183,7 +183,7 @@ static inline void mnt_dec_count(struct vfsmount *mnt)
unsigned int mnt_get_count(struct vfsmount *mnt) unsigned int mnt_get_count(struct vfsmount *mnt)
{ {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
unsigned int count = atomic_read(&mnt->mnt_longrefs); unsigned int count = 0;
int cpu; int cpu;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
...@@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) ...@@ -217,7 +217,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
if (!mnt->mnt_pcp) if (!mnt->mnt_pcp)
goto out_free_devname; goto out_free_devname;
atomic_set(&mnt->mnt_longrefs, 1); this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
#else #else
mnt->mnt_count = 1; mnt->mnt_count = 1;
mnt->mnt_writers = 0; mnt->mnt_writers = 0;
...@@ -624,8 +624,11 @@ static void commit_tree(struct vfsmount *mnt) ...@@ -624,8 +624,11 @@ static void commit_tree(struct vfsmount *mnt)
BUG_ON(parent == mnt); BUG_ON(parent == mnt);
list_add_tail(&head, &mnt->mnt_list); list_add_tail(&head, &mnt->mnt_list);
list_for_each_entry(m, &head, mnt_list) list_for_each_entry(m, &head, mnt_list) {
m->mnt_ns = n; m->mnt_ns = n;
atomic_inc(&m->mnt_longterm);
}
list_splice(&head, n->list.prev); list_splice(&head, n->list.prev);
list_add_tail(&mnt->mnt_hash, mount_hashtable + list_add_tail(&mnt->mnt_hash, mount_hashtable +
...@@ -734,51 +737,30 @@ static inline void mntfree(struct vfsmount *mnt) ...@@ -734,51 +737,30 @@ static inline void mntfree(struct vfsmount *mnt)
deactivate_super(sb); deactivate_super(sb);
} }
#ifdef CONFIG_SMP static void mntput_no_expire(struct vfsmount *mnt)
static inline void __mntput(struct vfsmount *mnt, int longrefs)
{ {
if (!longrefs) {
put_again: put_again:
br_read_lock(vfsmount_lock); #ifdef CONFIG_SMP
if (likely(atomic_read(&mnt->mnt_longrefs))) { br_read_lock(vfsmount_lock);
mnt_dec_count(mnt); if (likely(atomic_read(&mnt->mnt_longterm))) {
br_read_unlock(vfsmount_lock); mnt_dec_count(mnt);
return;
}
br_read_unlock(vfsmount_lock); br_read_unlock(vfsmount_lock);
} else { return;
BUG_ON(!atomic_read(&mnt->mnt_longrefs));
if (atomic_add_unless(&mnt->mnt_longrefs, -1, 1))
return;
} }
br_read_unlock(vfsmount_lock);
br_write_lock(vfsmount_lock); br_write_lock(vfsmount_lock);
if (!longrefs) mnt_dec_count(mnt);
mnt_dec_count(mnt);
else
atomic_dec(&mnt->mnt_longrefs);
if (mnt_get_count(mnt)) { if (mnt_get_count(mnt)) {
br_write_unlock(vfsmount_lock); br_write_unlock(vfsmount_lock);
return; return;
} }
if (unlikely(mnt->mnt_pinned)) {
mnt_add_count(mnt, mnt->mnt_pinned + 1);
mnt->mnt_pinned = 0;
br_write_unlock(vfsmount_lock);
acct_auto_close_mnt(mnt);
goto put_again;
}
br_write_unlock(vfsmount_lock);
mntfree(mnt);
}
#else #else
static inline void __mntput(struct vfsmount *mnt, int longrefs)
{
put_again:
mnt_dec_count(mnt); mnt_dec_count(mnt);
if (likely(mnt_get_count(mnt))) if (likely(mnt_get_count(mnt)))
return; return;
br_write_lock(vfsmount_lock); br_write_lock(vfsmount_lock);
#endif
if (unlikely(mnt->mnt_pinned)) { if (unlikely(mnt->mnt_pinned)) {
mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt_add_count(mnt, mnt->mnt_pinned + 1);
mnt->mnt_pinned = 0; mnt->mnt_pinned = 0;
...@@ -789,12 +771,6 @@ static inline void __mntput(struct vfsmount *mnt, int longrefs) ...@@ -789,12 +771,6 @@ static inline void __mntput(struct vfsmount *mnt, int longrefs)
br_write_unlock(vfsmount_lock); br_write_unlock(vfsmount_lock);
mntfree(mnt); mntfree(mnt);
} }
#endif
static void mntput_no_expire(struct vfsmount *mnt)
{
__mntput(mnt, 0);
}
void mntput(struct vfsmount *mnt) void mntput(struct vfsmount *mnt)
{ {
...@@ -802,7 +778,7 @@ void mntput(struct vfsmount *mnt) ...@@ -802,7 +778,7 @@ void mntput(struct vfsmount *mnt)
/* avoid cacheline pingpong, hope gcc doesn't get "smart" */ /* avoid cacheline pingpong, hope gcc doesn't get "smart" */
if (unlikely(mnt->mnt_expiry_mark)) if (unlikely(mnt->mnt_expiry_mark))
mnt->mnt_expiry_mark = 0; mnt->mnt_expiry_mark = 0;
__mntput(mnt, 0); mntput_no_expire(mnt);
} }
} }
EXPORT_SYMBOL(mntput); EXPORT_SYMBOL(mntput);
...@@ -815,33 +791,6 @@ struct vfsmount *mntget(struct vfsmount *mnt) ...@@ -815,33 +791,6 @@ struct vfsmount *mntget(struct vfsmount *mnt)
} }
EXPORT_SYMBOL(mntget); EXPORT_SYMBOL(mntget);
void mntput_long(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
if (mnt) {
/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
if (unlikely(mnt->mnt_expiry_mark))
mnt->mnt_expiry_mark = 0;
__mntput(mnt, 1);
}
#else
mntput(mnt);
#endif
}
EXPORT_SYMBOL(mntput_long);
struct vfsmount *mntget_long(struct vfsmount *mnt)
{
#ifdef CONFIG_SMP
if (mnt)
atomic_inc(&mnt->mnt_longrefs);
return mnt;
#else
return mntget(mnt);
#endif
}
EXPORT_SYMBOL(mntget_long);
void mnt_pin(struct vfsmount *mnt) void mnt_pin(struct vfsmount *mnt)
{ {
br_write_lock(vfsmount_lock); br_write_lock(vfsmount_lock);
...@@ -1216,7 +1165,7 @@ void release_mounts(struct list_head *head) ...@@ -1216,7 +1165,7 @@ void release_mounts(struct list_head *head)
dput(dentry); dput(dentry);
mntput(m); mntput(m);
} }
mntput_long(mnt); mntput(mnt);
} }
} }
...@@ -1240,6 +1189,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) ...@@ -1240,6 +1189,7 @@ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
list_del_init(&p->mnt_list); list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns); __touch_mnt_namespace(p->mnt_ns);
p->mnt_ns = NULL; p->mnt_ns = NULL;
atomic_dec(&p->mnt_longterm);
list_del_init(&p->mnt_child); list_del_init(&p->mnt_child);
if (p->mnt_parent != p) { if (p->mnt_parent != p) {
p->mnt_parent->mnt_ghosts++; p->mnt_parent->mnt_ghosts++;
...@@ -1969,7 +1919,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags) ...@@ -1969,7 +1919,7 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags)
unlock: unlock:
up_write(&namespace_sem); up_write(&namespace_sem);
mntput_long(newmnt); mntput(newmnt);
return err; return err;
} }
...@@ -2291,6 +2241,20 @@ static struct mnt_namespace *alloc_mnt_ns(void) ...@@ -2291,6 +2241,20 @@ static struct mnt_namespace *alloc_mnt_ns(void)
return new_ns; return new_ns;
} }
void mnt_make_longterm(struct vfsmount *mnt)
{
atomic_inc(&mnt->mnt_longterm);
}
void mnt_make_shortterm(struct vfsmount *mnt)
{
if (atomic_add_unless(&mnt->mnt_longterm, -1, 1))
return;
br_write_lock(vfsmount_lock);
atomic_dec(&mnt->mnt_longterm);
br_write_unlock(vfsmount_lock);
}
/* /*
* Allocate a new namespace structure and populate it with contents * Allocate a new namespace structure and populate it with contents
* copied from the namespace of the passed in task structure. * copied from the namespace of the passed in task structure.
...@@ -2328,14 +2292,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, ...@@ -2328,14 +2292,19 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
q = new_ns->root; q = new_ns->root;
while (p) { while (p) {
q->mnt_ns = new_ns; q->mnt_ns = new_ns;
atomic_inc(&q->mnt_longterm);
if (fs) { if (fs) {
if (p == fs->root.mnt) { if (p == fs->root.mnt) {
fs->root.mnt = mntget(q);
atomic_inc(&q->mnt_longterm);
mnt_make_shortterm(p);
rootmnt = p; rootmnt = p;
fs->root.mnt = mntget_long(q);
} }
if (p == fs->pwd.mnt) { if (p == fs->pwd.mnt) {
fs->pwd.mnt = mntget(q);
atomic_inc(&q->mnt_longterm);
mnt_make_shortterm(p);
pwdmnt = p; pwdmnt = p;
fs->pwd.mnt = mntget_long(q);
} }
} }
p = next_mnt(p, mnt_ns->root); p = next_mnt(p, mnt_ns->root);
...@@ -2344,9 +2313,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, ...@@ -2344,9 +2313,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
up_write(&namespace_sem); up_write(&namespace_sem);
if (rootmnt) if (rootmnt)
mntput_long(rootmnt); mntput(rootmnt);
if (pwdmnt) if (pwdmnt)
mntput_long(pwdmnt); mntput(pwdmnt);
return new_ns; return new_ns;
} }
...@@ -2379,6 +2348,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt) ...@@ -2379,6 +2348,7 @@ struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
new_ns = alloc_mnt_ns(); new_ns = alloc_mnt_ns();
if (!IS_ERR(new_ns)) { if (!IS_ERR(new_ns)) {
mnt->mnt_ns = new_ns; mnt->mnt_ns = new_ns;
atomic_inc(&mnt->mnt_longterm);
new_ns->root = mnt; new_ns->root = mnt;
list_add(&new_ns->list, &new_ns->root->mnt_list); list_add(&new_ns->list, &new_ns->root->mnt_list);
} }
......
...@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void) ...@@ -1292,7 +1292,7 @@ static int __init init_pipe_fs(void)
static void __exit exit_pipe_fs(void) static void __exit exit_pipe_fs(void)
{ {
unregister_filesystem(&pipe_fs_type); unregister_filesystem(&pipe_fs_type);
mntput_long(pipe_mnt); mntput(pipe_mnt);
} }
fs_initcall(init_pipe_fs); fs_initcall(init_pipe_fs);
......
...@@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) ...@@ -1141,7 +1141,7 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
return mnt; return mnt;
err: err:
mntput_long(mnt); mntput(mnt);
return ERR_PTR(err); return ERR_PTR(err);
} }
......
...@@ -60,7 +60,7 @@ struct vfsmount { ...@@ -60,7 +60,7 @@ struct vfsmount {
struct super_block *mnt_sb; /* pointer to superblock */ struct super_block *mnt_sb; /* pointer to superblock */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
struct mnt_pcp __percpu *mnt_pcp; struct mnt_pcp __percpu *mnt_pcp;
atomic_t mnt_longrefs; atomic_t mnt_longterm; /* how many of the refs are longterm */
#else #else
int mnt_count; int mnt_count;
int mnt_writers; int mnt_writers;
...@@ -96,8 +96,6 @@ extern int mnt_clone_write(struct vfsmount *mnt); ...@@ -96,8 +96,6 @@ extern int mnt_clone_write(struct vfsmount *mnt);
extern void mnt_drop_write(struct vfsmount *mnt); extern void mnt_drop_write(struct vfsmount *mnt);
extern void mntput(struct vfsmount *mnt); extern void mntput(struct vfsmount *mnt);
extern struct vfsmount *mntget(struct vfsmount *mnt); extern struct vfsmount *mntget(struct vfsmount *mnt);
extern void mntput_long(struct vfsmount *mnt);
extern struct vfsmount *mntget_long(struct vfsmount *mnt);
extern void mnt_pin(struct vfsmount *mnt); extern void mnt_pin(struct vfsmount *mnt);
extern void mnt_unpin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt);
extern int __mnt_is_readonly(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt);
......
...@@ -10,9 +10,7 @@ struct path { ...@@ -10,9 +10,7 @@ struct path {
}; };
extern void path_get(struct path *); extern void path_get(struct path *);
extern void path_get_long(struct path *);
extern void path_put(struct path *); extern void path_put(struct path *);
extern void path_put_long(struct path *);
static inline int path_equal(const struct path *path1, const struct path *path2) static inline int path_equal(const struct path *path1, const struct path *path2)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment