Commit 9c577491 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'work.dotdot1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs pathwalk sanitizing from Al Viro:
 "Massive pathwalk rewrite and cleanups.

  Several iterations have been posted; hopefully this thing is getting
  readable and understandable now. Pretty much all parts of pathname
  resolutions are affected...

  The branch is identical to what has sat in -next, except for commit
  message in "lift all calls of step_into() out of follow_dotdot/
  follow_dotdot_rcu", crediting Qian Cai for reporting the bug; only
  commit message changed there."

* 'work.dotdot1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (69 commits)
  lookup_open(): don't bother with fallbacks to lookup+create
  atomic_open(): no need to pass struct open_flags anymore
  open_last_lookups(): move complete_walk() into do_open()
  open_last_lookups(): lift O_EXCL|O_CREAT handling into do_open()
  open_last_lookups(): don't abuse complete_walk() when all we want is unlazy
  open_last_lookups(): consolidate fsnotify_create() calls
  take post-lookup part of do_last() out of loop
  link_path_walk(): sample parent's i_uid and i_mode for the last component
  __nd_alloc_stack(): make it return bool
  reserve_stack(): switch to __nd_alloc_stack()
  pick_link(): take reserving space on stack into a new helper
  pick_link(): more straightforward handling of allocation failures
  fold path_to_nameidata() into its only remaining caller
  pick_link(): pass it struct path already with normal refcounting rules
  fs/namei.c: kill follow_mount()
  non-RCU analogue of the previous commit
  helper for mount rootwards traversal
  follow_dotdot(): be lazy about changing nd->path
  follow_dotdot_rcu(): be lazy about changing nd->path
  follow_dotdot{,_rcu}(): massage loops
  ...
parents d987ca1c 99a4a90c
...@@ -404,11 +404,8 @@ that is the "next" component in the pathname. ...@@ -404,11 +404,8 @@ that is the "next" component in the pathname.
``int last_type`` ``int last_type``
~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~
This is one of ``LAST_NORM``, ``LAST_ROOT``, ``LAST_DOT``, ``LAST_DOTDOT``, or This is one of ``LAST_NORM``, ``LAST_ROOT``, ``LAST_DOT`` or ``LAST_DOTDOT``.
``LAST_BIND``. The ``last`` field is only valid if the type is The ``last`` field is only valid if the type is ``LAST_NORM``.
``LAST_NORM``. ``LAST_BIND`` is used when following a symlink and no
components of the symlink have been processed yet. Others should be
fairly self-explanatory.
``struct path root`` ``struct path root``
~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~
......
...@@ -186,7 +186,7 @@ static int find_autofs_mount(const char *pathname, ...@@ -186,7 +186,7 @@ static int find_autofs_mount(const char *pathname,
struct path path; struct path path;
int err; int err;
err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0); err = kern_path(pathname, LOOKUP_MOUNTPOINT, &path);
if (err) if (err)
return err; return err;
err = -ENOENT; err = -ENOENT;
...@@ -519,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp, ...@@ -519,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
if (!fp || param->ioctlfd == -1) { if (!fp || param->ioctlfd == -1) {
if (autofs_type_any(type)) if (autofs_type_any(type))
err = kern_path_mountpoint(AT_FDCWD, err = kern_path(name, LOOKUP_FOLLOW | LOOKUP_MOUNTPOINT,
name, &path, LOOKUP_FOLLOW); &path);
else else
err = find_autofs_mount(name, &path, err = find_autofs_mount(name, &path,
test_by_type, &type); test_by_type, &type);
......
...@@ -60,7 +60,6 @@ extern int finish_clean_context(struct fs_context *fc); ...@@ -60,7 +60,6 @@ extern int finish_clean_context(struct fs_context *fc);
*/ */
extern int filename_lookup(int dfd, struct filename *name, unsigned flags, extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root); struct path *path, struct path *root);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *, extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *); const char *, unsigned int, struct path *);
long do_mknodat(int dfd, const char __user *filename, umode_t mode, long do_mknodat(int dfd, const char __user *filename, umode_t mode,
......
...@@ -503,9 +503,10 @@ struct nameidata { ...@@ -503,9 +503,10 @@ struct nameidata {
} *stack, internal[EMBEDDED_LEVELS]; } *stack, internal[EMBEDDED_LEVELS];
struct filename *name; struct filename *name;
struct nameidata *saved; struct nameidata *saved;
struct inode *link_inode;
unsigned root_seq; unsigned root_seq;
int dfd; int dfd;
kuid_t dir_uid;
umode_t dir_mode;
} __randomize_layout; } __randomize_layout;
static void set_nameidata(struct nameidata *p, int dfd, struct filename *name) static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
...@@ -530,52 +531,34 @@ static void restore_nameidata(void) ...@@ -530,52 +531,34 @@ static void restore_nameidata(void)
kfree(now->stack); kfree(now->stack);
} }
static int __nd_alloc_stack(struct nameidata *nd) static bool nd_alloc_stack(struct nameidata *nd)
{ {
struct saved *p; struct saved *p;
if (nd->flags & LOOKUP_RCU) {
p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
GFP_ATOMIC);
if (unlikely(!p))
return -ECHILD;
} else {
p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved), p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
GFP_KERNEL); nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
if (unlikely(!p)) if (unlikely(!p))
return -ENOMEM; return false;
}
memcpy(p, nd->internal, sizeof(nd->internal)); memcpy(p, nd->internal, sizeof(nd->internal));
nd->stack = p; nd->stack = p;
return 0; return true;
} }
/** /**
* path_connected - Verify that a path->dentry is below path->mnt.mnt_root * path_connected - Verify that a dentry is below mnt.mnt_root
* @path: nameidate to verify
* *
* Rename can sometimes move a file or directory outside of a bind * Rename can sometimes move a file or directory outside of a bind
* mount, path_connected allows those cases to be detected. * mount, path_connected allows those cases to be detected.
*/ */
static bool path_connected(const struct path *path) static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
{ {
struct vfsmount *mnt = path->mnt;
struct super_block *sb = mnt->mnt_sb; struct super_block *sb = mnt->mnt_sb;
/* Bind mounts and multi-root filesystems can have disconnected paths */ /* Bind mounts and multi-root filesystems can have disconnected paths */
if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root)) if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
return true; return true;
return is_subdir(path->dentry, mnt->mnt_root); return is_subdir(dentry, mnt->mnt_root);
}
static inline int nd_alloc_stack(struct nameidata *nd)
{
if (likely(nd->depth != EMBEDDED_LEVELS))
return 0;
if (likely(nd->stack != nd->internal))
return 0;
return __nd_alloc_stack(nd);
} }
static void drop_links(struct nameidata *nd) static void drop_links(struct nameidata *nd)
...@@ -608,10 +591,9 @@ static void terminate_walk(struct nameidata *nd) ...@@ -608,10 +591,9 @@ static void terminate_walk(struct nameidata *nd)
} }
/* path_put is needed afterwards regardless of success or failure */ /* path_put is needed afterwards regardless of success or failure */
static bool legitimize_path(struct nameidata *nd, static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
struct path *path, unsigned seq)
{ {
int res = __legitimize_mnt(path->mnt, nd->m_seq); int res = __legitimize_mnt(path->mnt, mseq);
if (unlikely(res)) { if (unlikely(res)) {
if (res > 0) if (res > 0)
path->mnt = NULL; path->mnt = NULL;
...@@ -625,6 +607,12 @@ static bool legitimize_path(struct nameidata *nd, ...@@ -625,6 +607,12 @@ static bool legitimize_path(struct nameidata *nd,
return !read_seqcount_retry(&path->dentry->d_seq, seq); return !read_seqcount_retry(&path->dentry->d_seq, seq);
} }
static inline bool legitimize_path(struct nameidata *nd,
struct path *path, unsigned seq)
{
return __legitimize_path(path, nd->m_seq, seq);
}
static bool legitimize_links(struct nameidata *nd) static bool legitimize_links(struct nameidata *nd)
{ {
int i; int i;
...@@ -858,25 +846,6 @@ static int set_root(struct nameidata *nd) ...@@ -858,25 +846,6 @@ static int set_root(struct nameidata *nd)
return 0; return 0;
} }
static void path_put_conditional(struct path *path, struct nameidata *nd)
{
dput(path->dentry);
if (path->mnt != nd->path.mnt)
mntput(path->mnt);
}
static inline void path_to_nameidata(const struct path *path,
struct nameidata *nd)
{
if (!(nd->flags & LOOKUP_RCU)) {
dput(nd->path.dentry);
if (nd->path.mnt != path->mnt)
mntput(nd->path.mnt);
}
nd->path.mnt = path->mnt;
nd->path.dentry = path->dentry;
}
static int nd_jump_root(struct nameidata *nd) static int nd_jump_root(struct nameidata *nd)
{ {
if (unlikely(nd->flags & LOOKUP_BENEATH)) if (unlikely(nd->flags & LOOKUP_BENEATH))
...@@ -969,28 +938,21 @@ int sysctl_protected_regular __read_mostly; ...@@ -969,28 +938,21 @@ int sysctl_protected_regular __read_mostly;
* *
* Returns 0 if following the symlink is allowed, -ve on error. * Returns 0 if following the symlink is allowed, -ve on error.
*/ */
static inline int may_follow_link(struct nameidata *nd) static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
{ {
const struct inode *inode;
const struct inode *parent;
kuid_t puid;
if (!sysctl_protected_symlinks) if (!sysctl_protected_symlinks)
return 0; return 0;
/* Allowed if owner and follower match. */ /* Allowed if owner and follower match. */
inode = nd->link_inode;
if (uid_eq(current_cred()->fsuid, inode->i_uid)) if (uid_eq(current_cred()->fsuid, inode->i_uid))
return 0; return 0;
/* Allowed if parent directory not sticky and world-writable. */ /* Allowed if parent directory not sticky and world-writable. */
parent = nd->inode; if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
return 0; return 0;
/* Allowed if parent directory and link owner match. */ /* Allowed if parent directory and link owner match. */
puid = parent->i_uid; if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
return 0; return 0;
if (nd->flags & LOOKUP_RCU) if (nd->flags & LOOKUP_RCU)
...@@ -1113,63 +1075,6 @@ static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid, ...@@ -1113,63 +1075,6 @@ static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
return 0; return 0;
} }
static __always_inline
const char *get_link(struct nameidata *nd)
{
struct saved *last = nd->stack + nd->depth - 1;
struct dentry *dentry = last->link.dentry;
struct inode *inode = nd->link_inode;
int error;
const char *res;
if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS))
return ERR_PTR(-ELOOP);
if (!(nd->flags & LOOKUP_RCU)) {
touch_atime(&last->link);
cond_resched();
} else if (atime_needs_update(&last->link, inode)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
touch_atime(&last->link);
}
error = security_inode_follow_link(dentry, inode,
nd->flags & LOOKUP_RCU);
if (unlikely(error))
return ERR_PTR(error);
nd->last_type = LAST_BIND;
res = READ_ONCE(inode->i_link);
if (!res) {
const char * (*get)(struct dentry *, struct inode *,
struct delayed_call *);
get = inode->i_op->get_link;
if (nd->flags & LOOKUP_RCU) {
res = get(NULL, inode, &last->done);
if (res == ERR_PTR(-ECHILD)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
res = get(dentry, inode, &last->done);
}
} else {
res = get(dentry, inode, &last->done);
}
if (IS_ERR_OR_NULL(res))
return res;
}
if (*res == '/') {
error = nd_jump_root(nd);
if (unlikely(error))
return ERR_PTR(error);
while (unlikely(*++res == '/'))
;
}
if (!*res)
res = NULL;
return res;
}
/* /*
* follow_up - Find the mountpoint of path's vfsmount * follow_up - Find the mountpoint of path's vfsmount
* *
...@@ -1203,19 +1108,59 @@ int follow_up(struct path *path) ...@@ -1203,19 +1108,59 @@ int follow_up(struct path *path)
} }
EXPORT_SYMBOL(follow_up); EXPORT_SYMBOL(follow_up);
static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
struct path *path, unsigned *seqp)
{
while (mnt_has_parent(m)) {
struct dentry *mountpoint = m->mnt_mountpoint;
m = m->mnt_parent;
if (unlikely(root->dentry == mountpoint &&
root->mnt == &m->mnt))
break;
if (mountpoint != m->mnt.mnt_root) {
path->mnt = &m->mnt;
path->dentry = mountpoint;
*seqp = read_seqcount_begin(&mountpoint->d_seq);
return true;
}
}
return false;
}
static bool choose_mountpoint(struct mount *m, const struct path *root,
struct path *path)
{
bool found;
rcu_read_lock();
while (1) {
unsigned seq, mseq = read_seqbegin(&mount_lock);
found = choose_mountpoint_rcu(m, root, path, &seq);
if (unlikely(!found)) {
if (!read_seqretry(&mount_lock, mseq))
break;
} else {
if (likely(__legitimize_path(path, seq, mseq)))
break;
rcu_read_unlock();
path_put(path);
rcu_read_lock();
}
}
rcu_read_unlock();
return found;
}
/* /*
* Perform an automount * Perform an automount
* - return -EISDIR to tell follow_managed() to stop and return the path we * - return -EISDIR to tell follow_managed() to stop and return the path we
* were called with. * were called with.
*/ */
static int follow_automount(struct path *path, struct nameidata *nd, static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
bool *need_mntput)
{ {
struct vfsmount *mnt; struct dentry *dentry = path->dentry;
int err;
if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
return -EREMOTE;
/* We don't want to mount if someone's just doing a stat - /* We don't want to mount if someone's just doing a stat -
* unless they're stat'ing a directory and appended a '/' to * unless they're stat'ing a directory and appended a '/' to
...@@ -1228,138 +1173,91 @@ static int follow_automount(struct path *path, struct nameidata *nd, ...@@ -1228,138 +1173,91 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* as being automount points. These will need the attentions * as being automount points. These will need the attentions
* of the daemon to instantiate them before they can be used. * of the daemon to instantiate them before they can be used.
*/ */
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
path->dentry->d_inode) dentry->d_inode)
return -EISDIR; return -EISDIR;
nd->total_link_count++; if (count && (*count)++ >= MAXSYMLINKS)
if (nd->total_link_count >= 40)
return -ELOOP; return -ELOOP;
mnt = path->dentry->d_op->d_automount(path); return finish_automount(dentry->d_op->d_automount(path), path);
if (IS_ERR(mnt)) {
/*
* The filesystem is allowed to return -EISDIR here to indicate
* it doesn't want to automount. For instance, autofs would do
* this so that its userspace daemon can mount on this dentry.
*
* However, we can only permit this if it's a terminal point in
* the path being looked up; if it wasn't then the remainder of
* the path is inaccessible and we should say so.
*/
if (PTR_ERR(mnt) == -EISDIR && (nd->flags & LOOKUP_PARENT))
return -EREMOTE;
return PTR_ERR(mnt);
}
if (!mnt) /* mount collision */
return 0;
if (!*need_mntput) {
/* lock_mount() may release path->mnt on error */
mntget(path->mnt);
*need_mntput = true;
}
err = finish_automount(mnt, path);
switch (err) {
case -EBUSY:
/* Someone else made a mount here whilst we were busy */
return 0;
case 0:
path_put(path);
path->mnt = mnt;
path->dentry = dget(mnt->mnt_root);
return 0;
default:
return err;
}
} }
/* /*
* Handle a dentry that is managed in some way. * mount traversal - out-of-line part. One note on ->d_flags accesses -
* - Flagged for transit management (autofs) * dentries are pinned but not locked here, so negative dentry can go
* - Flagged as mountpoint * positive right under us. Use of smp_load_acquire() provides a barrier
* - Flagged as automount point * sufficient for ->d_inode and ->d_flags consistency.
*
* This may only be called in refwalk mode.
* On success path->dentry is known positive.
*
* Serialization is taken care of in namespace.c
*/ */
static int follow_managed(struct path *path, struct nameidata *nd) static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
int *count, unsigned lookup_flags)
{ {
struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */ struct vfsmount *mnt = path->mnt;
unsigned flags;
bool need_mntput = false; bool need_mntput = false;
int ret = 0; int ret = 0;
/* Given that we're not holding a lock here, we retain the value in a while (flags & DCACHE_MANAGED_DENTRY) {
* local variable for each dentry as we look at it so that we don't see
* the components of that value change under us */
while (flags = smp_load_acquire(&path->dentry->d_flags),
unlikely(flags & DCACHE_MANAGED_DENTRY)) {
/* Allow the filesystem to manage the transit without i_mutex /* Allow the filesystem to manage the transit without i_mutex
* being held. */ * being held. */
if (flags & DCACHE_MANAGE_TRANSIT) { if (flags & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path, false); ret = path->dentry->d_op->d_manage(path, false);
flags = smp_load_acquire(&path->dentry->d_flags); flags = smp_load_acquire(&path->dentry->d_flags);
if (ret < 0) if (ret < 0)
break; break;
} }
/* Transit to a mounted filesystem. */ if (flags & DCACHE_MOUNTED) { // something's mounted on it..
if (flags & DCACHE_MOUNTED) {
struct vfsmount *mounted = lookup_mnt(path); struct vfsmount *mounted = lookup_mnt(path);
if (mounted) { if (mounted) { // ... in our namespace
dput(path->dentry); dput(path->dentry);
if (need_mntput) if (need_mntput)
mntput(path->mnt); mntput(path->mnt);
path->mnt = mounted; path->mnt = mounted;
path->dentry = dget(mounted->mnt_root); path->dentry = dget(mounted->mnt_root);
// here we know it's positive
flags = path->dentry->d_flags;
need_mntput = true; need_mntput = true;
continue; continue;
} }
/* Something is mounted on this dentry in another
* namespace and/or whatever was mounted there in this
* namespace got unmounted before lookup_mnt() could
* get it */
} }
/* Handle an automount point */ if (!(flags & DCACHE_NEED_AUTOMOUNT))
if (flags & DCACHE_NEED_AUTOMOUNT) {
ret = follow_automount(path, nd, &need_mntput);
if (ret < 0)
break; break;
continue;
}
/* We didn't change the current path point */ // uncovered automount point
ret = follow_automount(path, count, lookup_flags);
flags = smp_load_acquire(&path->dentry->d_flags);
if (ret < 0)
break; break;
} }
if (need_mntput) { if (ret == -EISDIR)
if (path->mnt == mnt) ret = 0;
// possible if you race with several mount --move
if (need_mntput && path->mnt == mnt)
mntput(path->mnt); mntput(path->mnt);
if (unlikely(nd->flags & LOOKUP_NO_XDEV)) if (!ret && unlikely(d_flags_negative(flags)))
ret = -EXDEV;
else
nd->flags |= LOOKUP_JUMPED;
}
if (ret == -EISDIR || !ret)
ret = 1;
if (ret > 0 && unlikely(d_flags_negative(flags)))
ret = -ENOENT; ret = -ENOENT;
if (unlikely(ret < 0)) *jumped = need_mntput;
path_put_conditional(path, nd);
return ret; return ret;
} }
static inline int traverse_mounts(struct path *path, bool *jumped,
int *count, unsigned lookup_flags)
{
unsigned flags = smp_load_acquire(&path->dentry->d_flags);
/* fastpath */
if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
*jumped = false;
if (unlikely(d_flags_negative(flags)))
return -ENOENT;
return 0;
}
return __traverse_mounts(path, flags, jumped, count, lookup_flags);
}
int follow_down_one(struct path *path) int follow_down_one(struct path *path)
{ {
struct vfsmount *mounted; struct vfsmount *mounted;
...@@ -1376,11 +1274,22 @@ int follow_down_one(struct path *path) ...@@ -1376,11 +1274,22 @@ int follow_down_one(struct path *path)
} }
EXPORT_SYMBOL(follow_down_one); EXPORT_SYMBOL(follow_down_one);
static inline int managed_dentry_rcu(const struct path *path) /*
* Follow down to the covering mount currently visible to userspace. At each
* point, the filesystem owning that dentry may be queried as to whether the
* caller is permitted to proceed or not.
*/
int follow_down(struct path *path)
{ {
return (path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) ? struct vfsmount *mnt = path->mnt;
path->dentry->d_op->d_manage(path, true) : 0; bool jumped;
int ret = traverse_mounts(path, &jumped, NULL, 0);
if (path->mnt != mnt)
mntput(mnt);
return ret;
} }
EXPORT_SYMBOL(follow_down);
/* /*
* Try to skip to top of mountpoint pile in rcuwalk mode. Fail if * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
...@@ -1389,204 +1298,88 @@ static inline int managed_dentry_rcu(const struct path *path) ...@@ -1389,204 +1298,88 @@ static inline int managed_dentry_rcu(const struct path *path)
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
struct inode **inode, unsigned *seqp) struct inode **inode, unsigned *seqp)
{ {
struct dentry *dentry = path->dentry;
unsigned int flags = dentry->d_flags;
if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
return true;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return false;
for (;;) { for (;;) {
struct mount *mounted;
/* /*
* Don't forget we might have a non-mountpoint managed dentry * Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit. * that wants to block transit.
*/ */
switch (managed_dentry_rcu(path)) { if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
case -ECHILD: int res = dentry->d_op->d_manage(path, true);
default: if (res)
return false; return res == -EISDIR;
case -EISDIR: flags = dentry->d_flags;
return true;
case 0:
break;
} }
if (!d_mountpoint(path->dentry)) if (flags & DCACHE_MOUNTED) {
return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); struct mount *mounted = __lookup_mnt(path->mnt, dentry);
if (mounted) {
mounted = __lookup_mnt(path->mnt, path->dentry);
if (!mounted)
break;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return false;
path->mnt = &mounted->mnt; path->mnt = &mounted->mnt;
path->dentry = mounted->mnt.mnt_root; dentry = path->dentry = mounted->mnt.mnt_root;
nd->flags |= LOOKUP_JUMPED; nd->flags |= LOOKUP_JUMPED;
*seqp = read_seqcount_begin(&path->dentry->d_seq); *seqp = read_seqcount_begin(&dentry->d_seq);
*inode = dentry->d_inode;
/* /*
* Update the inode too. We don't need to re-check the * We don't need to re-check ->d_seq after this
* dentry sequence number here after this d_inode read, * ->d_inode read - there will be an RCU delay
* because a mount-point is always pinned. * between mount hash removal and ->mnt_root
* becoming unpinned.
*/ */
*inode = path->dentry->d_inode; flags = dentry->d_flags;
} continue;
return !read_seqretry(&mount_lock, nd->m_seq) &&
!(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
}
static int follow_dotdot_rcu(struct nameidata *nd)
{
struct inode *inode = nd->inode;
while (1) {
if (path_equal(&nd->path, &nd->root)) {
if (unlikely(nd->flags & LOOKUP_BENEATH))
return -ECHILD;
break;
}
if (nd->path.dentry != nd->path.mnt->mnt_root) {
struct dentry *old = nd->path.dentry;
struct dentry *parent = old->d_parent;
unsigned seq;
inode = parent->d_inode;
seq = read_seqcount_begin(&parent->d_seq);
if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
return -ECHILD;
nd->path.dentry = parent;
nd->seq = seq;
if (unlikely(!path_connected(&nd->path)))
return -ECHILD;
break;
} else {
struct mount *mnt = real_mount(nd->path.mnt);
struct mount *mparent = mnt->mnt_parent;
struct dentry *mountpoint = mnt->mnt_mountpoint;
struct inode *inode2 = mountpoint->d_inode;
unsigned seq = read_seqcount_begin(&mountpoint->d_seq);
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
return -ECHILD;
if (&mparent->mnt == nd->path.mnt)
break;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return -ECHILD;
/* we know that mountpoint was pinned */
nd->path.dentry = mountpoint;
nd->path.mnt = &mparent->mnt;
inode = inode2;
nd->seq = seq;
} }
if (read_seqretry(&mount_lock, nd->m_seq))
return false;
} }
while (unlikely(d_mountpoint(nd->path.dentry))) { return !(flags & DCACHE_NEED_AUTOMOUNT);
struct mount *mounted;
mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
return -ECHILD;
if (!mounted)
break;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return -ECHILD;
nd->path.mnt = &mounted->mnt;
nd->path.dentry = mounted->mnt.mnt_root;
inode = nd->path.dentry->d_inode;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
} }
nd->inode = inode;
return 0;
} }
/* static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
* Follow down to the covering mount currently visible to userspace. At each struct path *path, struct inode **inode,
* point, the filesystem owning that dentry may be queried as to whether the unsigned int *seqp)
* caller is permitted to proceed or not.
*/
int follow_down(struct path *path)
{ {
unsigned managed; bool jumped;
int ret; int ret;
while (managed = READ_ONCE(path->dentry->d_flags), path->mnt = nd->path.mnt;
unlikely(managed & DCACHE_MANAGED_DENTRY)) { path->dentry = dentry;
/* Allow the filesystem to manage the transit without i_mutex if (nd->flags & LOOKUP_RCU) {
* being held. unsigned int seq = *seqp;
* if (unlikely(!*inode))
* We indicate to the filesystem if someone is trying to mount return -ENOENT;
* something here. This gives autofs the chance to deny anyone if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
* other than its daemon the right to mount on its return 0;
* superstructure. if (unlazy_child(nd, dentry, seq))
* return -ECHILD;
* The filesystem may sleep at this point. // *path might've been clobbered by __follow_mount_rcu()
*/ path->mnt = nd->path.mnt;
if (managed & DCACHE_MANAGE_TRANSIT) { path->dentry = dentry;
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path, false);
if (ret < 0)
return ret == -EISDIR ? 0 : ret;
}
/* Transit to a mounted filesystem. */
if (managed & DCACHE_MOUNTED) {
struct vfsmount *mounted = lookup_mnt(path);
if (!mounted)
break;
dput(path->dentry);
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root);
continue;
} }
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
/* Don't handle automount points here */ if (jumped) {
break; if (unlikely(nd->flags & LOOKUP_NO_XDEV))
ret = -EXDEV;
else
nd->flags |= LOOKUP_JUMPED;
} }
return 0; if (unlikely(ret)) {
}
EXPORT_SYMBOL(follow_down);
/*
* Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
*/
static void follow_mount(struct path *path)
{
while (d_mountpoint(path->dentry)) {
struct vfsmount *mounted = lookup_mnt(path);
if (!mounted)
break;
dput(path->dentry); dput(path->dentry);
if (path->mnt != nd->path.mnt)
mntput(path->mnt); mntput(path->mnt);
path->mnt = mounted; } else {
path->dentry = dget(mounted->mnt_root); *inode = d_backing_inode(path->dentry);
} *seqp = 0; /* out of RCU mode, so the value doesn't matter */
}
static int path_parent_directory(struct path *path)
{
struct dentry *old = path->dentry;
/* rare case of legitimate dget_parent()... */
path->dentry = dget_parent(path->dentry);
dput(old);
if (unlikely(!path_connected(path)))
return -ENOENT;
return 0;
}
static int follow_dotdot(struct nameidata *nd)
{
while (1) {
if (path_equal(&nd->path, &nd->root)) {
if (unlikely(nd->flags & LOOKUP_BENEATH))
return -EXDEV;
break;
} }
if (nd->path.dentry != nd->path.mnt->mnt_root) {
int ret = path_parent_directory(&nd->path);
if (ret)
return ret; return ret;
break;
}
if (!follow_up(&nd->path))
break;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return -EXDEV;
}
follow_mount(&nd->path);
nd->inode = nd->path.dentry->d_inode;
return 0;
} }
/* /*
...@@ -1643,14 +1436,12 @@ static struct dentry *__lookup_hash(const struct qstr *name, ...@@ -1643,14 +1436,12 @@ static struct dentry *__lookup_hash(const struct qstr *name,
return dentry; return dentry;
} }
static int lookup_fast(struct nameidata *nd, static struct dentry *lookup_fast(struct nameidata *nd,
struct path *path, struct inode **inode, struct inode **inode,
unsigned *seqp) unsigned *seqp)
{ {
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry; struct dentry *dentry, *parent = nd->path.dentry;
int status = 1; int status = 1;
int err;
/* /*
* Rename seqlock is not required here because in the off chance * Rename seqlock is not required here because in the off chance
...@@ -1659,12 +1450,11 @@ static int lookup_fast(struct nameidata *nd, ...@@ -1659,12 +1450,11 @@ static int lookup_fast(struct nameidata *nd,
*/ */
if (nd->flags & LOOKUP_RCU) { if (nd->flags & LOOKUP_RCU) {
unsigned seq; unsigned seq;
bool negative;
dentry = __d_lookup_rcu(parent, &nd->last, &seq); dentry = __d_lookup_rcu(parent, &nd->last, &seq);
if (unlikely(!dentry)) { if (unlikely(!dentry)) {
if (unlazy_walk(nd)) if (unlazy_walk(nd))
return -ECHILD; return ERR_PTR(-ECHILD);
return 0; return NULL;
} }
/* /*
...@@ -1672,9 +1462,8 @@ static int lookup_fast(struct nameidata *nd, ...@@ -1672,9 +1462,8 @@ static int lookup_fast(struct nameidata *nd,
* the dentry name information from lookup. * the dentry name information from lookup.
*/ */
*inode = d_backing_inode(dentry); *inode = d_backing_inode(dentry);
negative = d_is_negative(dentry);
if (unlikely(read_seqcount_retry(&dentry->d_seq, seq))) if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
return -ECHILD; return ERR_PTR(-ECHILD);
/* /*
* This sequence count validates that the parent had no * This sequence count validates that the parent had no
...@@ -1684,46 +1473,30 @@ static int lookup_fast(struct nameidata *nd, ...@@ -1684,46 +1473,30 @@ static int lookup_fast(struct nameidata *nd,
* enough, we can use __read_seqcount_retry here. * enough, we can use __read_seqcount_retry here.
*/ */
if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq))) if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
return -ECHILD; return ERR_PTR(-ECHILD);
*seqp = seq; *seqp = seq;
status = d_revalidate(dentry, nd->flags); status = d_revalidate(dentry, nd->flags);
if (likely(status > 0)) { if (likely(status > 0))
/* return dentry;
* Note: do negative dentry check after revalidation in
* case that drops it.
*/
if (unlikely(negative))
return -ENOENT;
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
return 1;
}
if (unlazy_child(nd, dentry, seq)) if (unlazy_child(nd, dentry, seq))
return -ECHILD; return ERR_PTR(-ECHILD);
if (unlikely(status == -ECHILD)) if (unlikely(status == -ECHILD))
/* we'd been told to redo it in non-rcu mode */ /* we'd been told to redo it in non-rcu mode */
status = d_revalidate(dentry, nd->flags); status = d_revalidate(dentry, nd->flags);
} else { } else {
dentry = __d_lookup(parent, &nd->last); dentry = __d_lookup(parent, &nd->last);
if (unlikely(!dentry)) if (unlikely(!dentry))
return 0; return NULL;
status = d_revalidate(dentry, nd->flags); status = d_revalidate(dentry, nd->flags);
} }
if (unlikely(status <= 0)) { if (unlikely(status <= 0)) {
if (!status) if (!status)
d_invalidate(dentry); d_invalidate(dentry);
dput(dentry); dput(dentry);
return status; return ERR_PTR(status);
} }
return dentry;
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd);
if (likely(err > 0))
*inode = d_backing_inode(path->dentry);
return err;
} }
/* Fast lookup failed, do it the slow way */ /* Fast lookup failed, do it the slow way */
...@@ -1788,81 +1561,107 @@ static inline int may_lookup(struct nameidata *nd) ...@@ -1788,81 +1561,107 @@ static inline int may_lookup(struct nameidata *nd)
return inode_permission(nd->inode, MAY_EXEC); return inode_permission(nd->inode, MAY_EXEC);
} }
static inline int handle_dots(struct nameidata *nd, int type) static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
{ {
if (type == LAST_DOTDOT) { if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
int error = 0; return -ELOOP;
if (!nd->root.mnt) { if (likely(nd->depth != EMBEDDED_LEVELS))
error = set_root(nd); return 0;
if (error) if (likely(nd->stack != nd->internal))
return error; return 0;
} if (likely(nd_alloc_stack(nd)))
if (nd->flags & LOOKUP_RCU) return 0;
error = follow_dotdot_rcu(nd);
else
error = follow_dotdot(nd);
if (error)
return error;
if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) { if (nd->flags & LOOKUP_RCU) {
/* // we need to grab link before we do unlazy. And we can't skip
* If there was a racing rename or mount along our // unlazy even if we fail to grab the link - cleanup needs it
* path, then we can't be sure that ".." hasn't jumped bool grabbed_link = legitimize_path(nd, link, seq);
* above nd->root (and so userspace should retry or use
* some fallback). if (unlazy_walk(nd) != 0 || !grabbed_link)
*/ return -ECHILD;
smp_rmb();
if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq))) if (nd_alloc_stack(nd))
return -EAGAIN;
if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
return -EAGAIN;
}
}
return 0; return 0;
}
return -ENOMEM;
} }
static int pick_link(struct nameidata *nd, struct path *link, enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
struct inode *inode, unsigned seq)
static const char *pick_link(struct nameidata *nd, struct path *link,
struct inode *inode, unsigned seq, int flags)
{ {
int error;
struct saved *last; struct saved *last;
if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) { const char *res;
path_to_nameidata(link, nd); int error = reserve_stack(nd, link, seq);
return -ELOOP;
}
if (!(nd->flags & LOOKUP_RCU)) {
if (link->mnt == nd->path.mnt)
mntget(link->mnt);
}
error = nd_alloc_stack(nd);
if (unlikely(error)) { if (unlikely(error)) {
if (error == -ECHILD) { if (!(nd->flags & LOOKUP_RCU))
if (unlikely(!legitimize_path(nd, link, seq))) {
drop_links(nd);
nd->depth = 0;
nd->flags &= ~LOOKUP_RCU;
nd->path.mnt = NULL;
nd->path.dentry = NULL;
rcu_read_unlock();
} else if (likely(unlazy_walk(nd)) == 0)
error = nd_alloc_stack(nd);
}
if (error) {
path_put(link); path_put(link);
return error; return ERR_PTR(error);
}
} }
last = nd->stack + nd->depth++; last = nd->stack + nd->depth++;
last->link = *link; last->link = *link;
clear_delayed_call(&last->done); clear_delayed_call(&last->done);
nd->link_inode = inode;
last->seq = seq; last->seq = seq;
return 1;
}
enum {WALK_FOLLOW = 1, WALK_MORE = 2}; if (flags & WALK_TRAILING) {
error = may_follow_link(nd, inode);
if (unlikely(error))
return ERR_PTR(error);
}
if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS))
return ERR_PTR(-ELOOP);
if (!(nd->flags & LOOKUP_RCU)) {
touch_atime(&last->link);
cond_resched();
} else if (atime_needs_update(&last->link, inode)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
touch_atime(&last->link);
}
error = security_inode_follow_link(link->dentry, inode,
nd->flags & LOOKUP_RCU);
if (unlikely(error))
return ERR_PTR(error);
res = READ_ONCE(inode->i_link);
if (!res) {
const char * (*get)(struct dentry *, struct inode *,
struct delayed_call *);
get = inode->i_op->get_link;
if (nd->flags & LOOKUP_RCU) {
res = get(NULL, inode, &last->done);
if (res == ERR_PTR(-ECHILD)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
res = get(link->dentry, inode, &last->done);
}
} else {
res = get(link->dentry, inode, &last->done);
}
if (!res)
goto all_done;
if (IS_ERR(res))
return res;
}
if (*res == '/') {
error = nd_jump_root(nd);
if (unlikely(error))
return ERR_PTR(error);
while (unlikely(*++res == '/'))
;
}
if (*res)
return res;
all_done: // pure jump
put_link(nd);
return NULL;
}
/* /*
* Do we need to follow links? We _really_ want to be able * Do we need to follow links? We _really_ want to be able
...@@ -1870,63 +1669,187 @@ enum {WALK_FOLLOW = 1, WALK_MORE = 2}; ...@@ -1870,63 +1669,187 @@ enum {WALK_FOLLOW = 1, WALK_MORE = 2};
* so we keep a cache of "no, this doesn't need follow_link" * so we keep a cache of "no, this doesn't need follow_link"
* for the common case. * for the common case.
*/ */
static inline int step_into(struct nameidata *nd, struct path *path, static const char *step_into(struct nameidata *nd, int flags,
int flags, struct inode *inode, unsigned seq) struct dentry *dentry, struct inode *inode, unsigned seq)
{ {
if (!(flags & WALK_MORE) && nd->depth) struct path path;
put_link(nd); int err = handle_mounts(nd, dentry, &path, &inode, &seq);
if (likely(!d_is_symlink(path->dentry)) ||
!(flags & WALK_FOLLOW || nd->flags & LOOKUP_FOLLOW)) { if (err < 0)
return ERR_PTR(err);
if (likely(!d_is_symlink(path.dentry)) ||
((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
(flags & WALK_NOFOLLOW)) {
/* not a symlink or should not follow */ /* not a symlink or should not follow */
path_to_nameidata(path, nd); if (!(nd->flags & LOOKUP_RCU)) {
dput(nd->path.dentry);
if (nd->path.mnt != path.mnt)
mntput(nd->path.mnt);
}
nd->path = path;
nd->inode = inode; nd->inode = inode;
nd->seq = seq; nd->seq = seq;
return 0; return NULL;
} }
/* make sure that d_is_symlink above matches inode */
if (nd->flags & LOOKUP_RCU) { if (nd->flags & LOOKUP_RCU) {
if (read_seqcount_retry(&path->dentry->d_seq, seq)) /* make sure that d_is_symlink above matches inode */
return -ECHILD; if (read_seqcount_retry(&path.dentry->d_seq, seq))
return ERR_PTR(-ECHILD);
} else {
if (path.mnt == nd->path.mnt)
mntget(path.mnt);
} }
return pick_link(nd, path, inode, seq); return pick_link(nd, &path, inode, seq, flags);
} }
static int walk_component(struct nameidata *nd, int flags) static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
struct inode **inodep,
unsigned *seqp)
{ {
struct dentry *parent, *old;
if (path_equal(&nd->path, &nd->root))
goto in_root;
if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
struct path path; struct path path;
unsigned seq;
if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
&nd->root, &path, &seq))
goto in_root;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return ERR_PTR(-ECHILD);
nd->path = path;
nd->inode = path.dentry->d_inode;
nd->seq = seq;
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
return ERR_PTR(-ECHILD);
/* we know that mountpoint was pinned */
}
old = nd->path.dentry;
parent = old->d_parent;
*inodep = parent->d_inode;
*seqp = read_seqcount_begin(&parent->d_seq);
if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
return ERR_PTR(-ECHILD);
if (unlikely(!path_connected(nd->path.mnt, parent)))
return ERR_PTR(-ECHILD);
return parent;
in_root:
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
return ERR_PTR(-ECHILD);
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-ECHILD);
return NULL;
}
static struct dentry *follow_dotdot(struct nameidata *nd,
struct inode **inodep,
unsigned *seqp)
{
struct dentry *parent;
if (path_equal(&nd->path, &nd->root))
goto in_root;
if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
struct path path;
if (!choose_mountpoint(real_mount(nd->path.mnt),
&nd->root, &path))
goto in_root;
path_put(&nd->path);
nd->path = path;
nd->inode = path.dentry->d_inode;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return ERR_PTR(-EXDEV);
}
/* rare case of legitimate dget_parent()... */
parent = dget_parent(nd->path.dentry);
if (unlikely(!path_connected(nd->path.mnt, parent))) {
dput(parent);
return ERR_PTR(-ENOENT);
}
*seqp = 0;
*inodep = parent->d_inode;
return parent;
in_root:
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-EXDEV);
dget(nd->path.dentry);
return NULL;
}
static const char *handle_dots(struct nameidata *nd, int type)
{
if (type == LAST_DOTDOT) {
const char *error = NULL;
struct dentry *parent;
struct inode *inode;
unsigned seq;
if (!nd->root.mnt) {
error = ERR_PTR(set_root(nd));
if (error)
return error;
}
if (nd->flags & LOOKUP_RCU)
parent = follow_dotdot_rcu(nd, &inode, &seq);
else
parent = follow_dotdot(nd, &inode, &seq);
if (IS_ERR(parent))
return ERR_CAST(parent);
if (unlikely(!parent))
error = step_into(nd, WALK_NOFOLLOW,
nd->path.dentry, nd->inode, nd->seq);
else
error = step_into(nd, WALK_NOFOLLOW,
parent, inode, seq);
if (unlikely(error))
return error;
if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
/*
* If there was a racing rename or mount along our
* path, then we can't be sure that ".." hasn't jumped
* above nd->root (and so userspace should retry or use
* some fallback).
*/
smp_rmb();
if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
return ERR_PTR(-EAGAIN);
if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
return ERR_PTR(-EAGAIN);
}
}
return NULL;
}
static const char *walk_component(struct nameidata *nd, int flags)
{
struct dentry *dentry;
struct inode *inode; struct inode *inode;
unsigned seq; unsigned seq;
int err;
/* /*
* "." and ".." are special - ".." especially so because it has * "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and * to be able to know about the current root directory and
* parent relationships. * parent relationships.
*/ */
if (unlikely(nd->last_type != LAST_NORM)) { if (unlikely(nd->last_type != LAST_NORM)) {
err = handle_dots(nd, nd->last_type);
if (!(flags & WALK_MORE) && nd->depth) if (!(flags & WALK_MORE) && nd->depth)
put_link(nd); put_link(nd);
return err; return handle_dots(nd, nd->last_type);
} }
err = lookup_fast(nd, &path, &inode, &seq); dentry = lookup_fast(nd, &inode, &seq);
if (unlikely(err <= 0)) { if (IS_ERR(dentry))
if (err < 0) return ERR_CAST(dentry);
return err; if (unlikely(!dentry)) {
path.dentry = lookup_slow(&nd->last, nd->path.dentry, dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
nd->flags); if (IS_ERR(dentry))
if (IS_ERR(path.dentry)) return ERR_CAST(dentry);
return PTR_ERR(path.dentry);
path.mnt = nd->path.mnt;
err = follow_managed(&path, nd);
if (unlikely(err < 0))
return err;
seq = 0; /* we are already out of RCU mode */
inode = d_backing_inode(path.dentry);
} }
if (!(flags & WALK_MORE) && nd->depth)
return step_into(nd, &path, flags, inode, seq); put_link(nd);
return step_into(nd, flags, dentry, inode, seq);
} }
/* /*
...@@ -2167,8 +2090,11 @@ static inline u64 hash_name(const void *salt, const char *name) ...@@ -2167,8 +2090,11 @@ static inline u64 hash_name(const void *salt, const char *name)
*/ */
static int link_path_walk(const char *name, struct nameidata *nd) static int link_path_walk(const char *name, struct nameidata *nd)
{ {
int depth = 0; // depth <= nd->depth
int err; int err;
nd->last_type = LAST_ROOT;
nd->flags |= LOOKUP_PARENT;
if (IS_ERR(name)) if (IS_ERR(name))
return PTR_ERR(name); return PTR_ERR(name);
while (*name=='/') while (*name=='/')
...@@ -2178,6 +2104,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) ...@@ -2178,6 +2104,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
/* At this point we know we have a real path component. */ /* At this point we know we have a real path component. */
for(;;) { for(;;) {
const char *link;
u64 hash_len; u64 hash_len;
int type; int type;
...@@ -2227,37 +2154,28 @@ static int link_path_walk(const char *name, struct nameidata *nd) ...@@ -2227,37 +2154,28 @@ static int link_path_walk(const char *name, struct nameidata *nd)
} while (unlikely(*name == '/')); } while (unlikely(*name == '/'));
if (unlikely(!*name)) { if (unlikely(!*name)) {
OK: OK:
/* pathname body, done */ /* pathname or trailing symlink, done */
if (!nd->depth) if (!depth) {
return 0; nd->dir_uid = nd->inode->i_uid;
name = nd->stack[nd->depth - 1].name; nd->dir_mode = nd->inode->i_mode;
/* trailing symlink, done */ nd->flags &= ~LOOKUP_PARENT;
if (!name)
return 0; return 0;
}
/* last component of nested symlink */ /* last component of nested symlink */
err = walk_component(nd, WALK_FOLLOW); name = nd->stack[--depth].name;
link = walk_component(nd, 0);
} else { } else {
/* not the last component */ /* not the last component */
err = walk_component(nd, WALK_FOLLOW | WALK_MORE); link = walk_component(nd, WALK_MORE);
} }
if (err < 0) if (unlikely(link)) {
return err; if (IS_ERR(link))
return PTR_ERR(link);
if (err) { /* a symlink to follow */
const char *s = get_link(nd); nd->stack[depth++].name = name;
name = link;
if (IS_ERR(s))
return PTR_ERR(s);
err = 0;
if (unlikely(!s)) {
/* jumped */
put_link(nd);
} else {
nd->stack[nd->depth - 1].name = name;
name = s;
continue; continue;
} }
}
if (unlikely(!d_can_lookup(nd->path.dentry))) { if (unlikely(!d_can_lookup(nd->path.dentry))) {
if (nd->flags & LOOKUP_RCU) { if (nd->flags & LOOKUP_RCU) {
if (unlazy_walk(nd)) if (unlazy_walk(nd))
...@@ -2279,8 +2197,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags) ...@@ -2279,8 +2197,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
if (flags & LOOKUP_RCU) if (flags & LOOKUP_RCU)
rcu_read_lock(); rcu_read_lock();
nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED;
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
nd->depth = 0; nd->depth = 0;
nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount); nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
...@@ -2370,54 +2287,20 @@ static const char *path_init(struct nameidata *nd, unsigned flags) ...@@ -2370,54 +2287,20 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
return s; return s;
} }
static const char *trailing_symlink(struct nameidata *nd) static inline const char *lookup_last(struct nameidata *nd)
{
const char *s;
int error = may_follow_link(nd);
if (unlikely(error))
return ERR_PTR(error);
nd->flags |= LOOKUP_PARENT;
nd->stack[0].name = NULL;
s = get_link(nd);
return s ? s : "";
}
static inline int lookup_last(struct nameidata *nd)
{ {
if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
nd->flags &= ~LOOKUP_PARENT; return walk_component(nd, WALK_TRAILING);
return walk_component(nd, 0);
} }
static int handle_lookup_down(struct nameidata *nd) static int handle_lookup_down(struct nameidata *nd)
{ {
struct path path = nd->path; if (!(nd->flags & LOOKUP_RCU))
struct inode *inode = nd->inode; dget(nd->path.dentry);
unsigned seq = nd->seq; return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
int err; nd->path.dentry, nd->inode, nd->seq));
if (nd->flags & LOOKUP_RCU) {
/*
* don't bother with unlazy_walk on failure - we are
* at the very beginning of walk, so we lose nothing
* if we simply redo everything in non-RCU mode
*/
if (unlikely(!__follow_mount_rcu(nd, &path, &inode, &seq)))
return -ECHILD;
} else {
dget(path.dentry);
err = follow_managed(&path, nd);
if (unlikely(err < 0))
return err;
inode = d_backing_inode(path.dentry);
seq = 0;
}
path_to_nameidata(&path, nd);
nd->inode = inode;
nd->seq = seq;
return 0;
} }
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
...@@ -2432,16 +2315,19 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path ...@@ -2432,16 +2315,19 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
s = ERR_PTR(err); s = ERR_PTR(err);
} }
while (!(err = link_path_walk(s, nd)) while (!(err = link_path_walk(s, nd)) &&
&& ((err = lookup_last(nd)) > 0)) { (s = lookup_last(nd)) != NULL)
s = trailing_symlink(nd); ;
}
if (!err) if (!err)
err = complete_walk(nd); err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY) if (!err && nd->flags & LOOKUP_DIRECTORY)
if (!d_can_lookup(nd->path.dentry)) if (!d_can_lookup(nd->path.dentry))
err = -ENOTDIR; err = -ENOTDIR;
if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
err = handle_lookup_down(nd);
nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please...
}
if (!err) { if (!err) {
*path = nd->path; *path = nd->path;
nd->path.mnt = NULL; nd->path.mnt = NULL;
...@@ -2470,7 +2356,8 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags, ...@@ -2470,7 +2356,8 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags,
retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path); retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
if (likely(!retval)) if (likely(!retval))
audit_inode(name, path->dentry, 0); audit_inode(name, path->dentry,
flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
restore_nameidata(); restore_nameidata();
putname(name); putname(name);
return retval; return retval;
...@@ -2718,24 +2605,23 @@ int path_pts(struct path *path) ...@@ -2718,24 +2605,23 @@ int path_pts(struct path *path)
/* Find something mounted on "pts" in the same directory as /* Find something mounted on "pts" in the same directory as
* the input path. * the input path.
*/ */
struct dentry *child, *parent; struct dentry *parent = dget_parent(path->dentry);
struct qstr this; struct dentry *child;
int ret; struct qstr this = QSTR_INIT("pts", 3);
ret = path_parent_directory(path);
if (ret)
return ret;
parent = path->dentry; if (unlikely(!path_connected(path->mnt, parent))) {
this.name = "pts"; dput(parent);
this.len = 3; return -ENOENT;
}
dput(path->dentry);
path->dentry = parent;
child = d_hash_and_lookup(parent, &this); child = d_hash_and_lookup(parent, &this);
if (!child) if (!child)
return -ENOENT; return -ENOENT;
path->dentry = child; path->dentry = child;
dput(parent); dput(parent);
follow_mount(path); follow_down(path);
return 0; return 0;
} }
#endif #endif
...@@ -2748,88 +2634,6 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags, ...@@ -2748,88 +2634,6 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
} }
EXPORT_SYMBOL(user_path_at_empty); EXPORT_SYMBOL(user_path_at_empty);
/**
* path_mountpoint - look up a path to be umounted
* @nd: lookup context
* @flags: lookup flags
* @path: pointer to container for result
*
* Look up the given name, but don't attempt to revalidate the last component.
* Returns 0 and "path" will be valid on success; Returns error otherwise.
*/
static int
path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
{
const char *s = path_init(nd, flags);
int err;
while (!(err = link_path_walk(s, nd)) &&
(err = lookup_last(nd)) > 0) {
s = trailing_symlink(nd);
}
if (!err && (nd->flags & LOOKUP_RCU))
err = unlazy_walk(nd);
if (!err)
err = handle_lookup_down(nd);
if (!err) {
*path = nd->path;
nd->path.mnt = NULL;
nd->path.dentry = NULL;
}
terminate_walk(nd);
return err;
}
static int
filename_mountpoint(int dfd, struct filename *name, struct path *path,
unsigned int flags)
{
struct nameidata nd;
int error;
if (IS_ERR(name))
return PTR_ERR(name);
set_nameidata(&nd, dfd, name);
error = path_mountpoint(&nd, flags | LOOKUP_RCU, path);
if (unlikely(error == -ECHILD))
error = path_mountpoint(&nd, flags, path);
if (unlikely(error == -ESTALE))
error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path);
if (likely(!error))
audit_inode(name, path->dentry, AUDIT_INODE_NOEVAL);
restore_nameidata();
putname(name);
return error;
}
/**
* user_path_mountpoint_at - lookup a path from userland in order to umount it
* @dfd: directory file descriptor
* @name: pathname from userland
* @flags: lookup flags
* @path: pointer to container to hold result
*
* A umount is a special case for path walking. We're not actually interested
* in the inode in this situation, and ESTALE errors can be a problem. We
* simply want track down the dentry and vfsmount attached at the mountpoint
* and avoid revalidating the last component.
*
* Returns 0 and populates "path" on success.
*/
int
user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags,
struct path *path)
{
return filename_mountpoint(dfd, getname(name), path, flags);
}
int
kern_path_mountpoint(int dfd, const char *name, struct path *path,
unsigned int flags)
{
return filename_mountpoint(dfd, getname_kernel(name), path, flags);
}
EXPORT_SYMBOL(kern_path_mountpoint);
int __check_sticky(struct inode *dir, struct inode *inode) int __check_sticky(struct inode *dir, struct inode *inode)
{ {
kuid_t fsuid = current_fsuid(); kuid_t fsuid = current_fsuid();
...@@ -3127,18 +2931,14 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m ...@@ -3127,18 +2931,14 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
* *
* Returns an error code otherwise. * Returns an error code otherwise.
*/ */
static int atomic_open(struct nameidata *nd, struct dentry *dentry, static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct file *file, struct file *file,
const struct open_flags *op,
int open_flag, umode_t mode) int open_flag, umode_t mode)
{ {
struct dentry *const DENTRY_NOT_SET = (void *) -1UL; struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
struct inode *dir = nd->path.dentry->d_inode; struct inode *dir = nd->path.dentry->d_inode;
int error; int error;
if (!(~open_flag & (O_EXCL | O_CREAT))) /* both O_EXCL and O_CREAT */
open_flag &= ~O_TRUNC;
if (nd->flags & LOOKUP_DIRECTORY) if (nd->flags & LOOKUP_DIRECTORY)
open_flag |= O_DIRECTORY; open_flag |= O_DIRECTORY;
...@@ -3149,19 +2949,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, ...@@ -3149,19 +2949,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
d_lookup_done(dentry); d_lookup_done(dentry);
if (!error) { if (!error) {
if (file->f_mode & FMODE_OPENED) { if (file->f_mode & FMODE_OPENED) {
/* if (unlikely(dentry != file->f_path.dentry)) {
* We didn't have the inode before the open, so check open dput(dentry);
* permission here. dentry = dget(file->f_path.dentry);
*/
int acc_mode = op->acc_mode;
if (file->f_mode & FMODE_CREATED) {
WARN_ON(!(open_flag & O_CREAT));
fsnotify_create(dir, dentry);
acc_mode = 0;
} }
error = may_open(&file->f_path, acc_mode, open_flag);
if (WARN_ON(error > 0))
error = -EINVAL;
} else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
error = -EIO; error = -EIO;
} else { } else {
...@@ -3169,19 +2960,15 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, ...@@ -3169,19 +2960,15 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
dput(dentry); dput(dentry);
dentry = file->f_path.dentry; dentry = file->f_path.dentry;
} }
if (file->f_mode & FMODE_CREATED) if (unlikely(d_is_negative(dentry)))
fsnotify_create(dir, dentry);
if (unlikely(d_is_negative(dentry))) {
error = -ENOENT; error = -ENOENT;
} else {
path->dentry = dentry;
path->mnt = nd->path.mnt;
return 0;
}
} }
} }
if (error) {
dput(dentry); dput(dentry);
return error; dentry = ERR_PTR(error);
}
return dentry;
} }
/* /*
...@@ -3199,8 +2986,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, ...@@ -3199,8 +2986,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
* *
* An error code is returned on failure. * An error code is returned on failure.
*/ */
static int lookup_open(struct nameidata *nd, struct path *path, static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
struct file *file,
const struct open_flags *op, const struct open_flags *op,
bool got_write) bool got_write)
{ {
...@@ -3213,7 +2999,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, ...@@ -3213,7 +2999,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (unlikely(IS_DEADDIR(dir_inode))) if (unlikely(IS_DEADDIR(dir_inode)))
return -ENOENT; return ERR_PTR(-ENOENT);
file->f_mode &= ~FMODE_CREATED; file->f_mode &= ~FMODE_CREATED;
dentry = d_lookup(dir, &nd->last); dentry = d_lookup(dir, &nd->last);
...@@ -3221,7 +3007,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, ...@@ -3221,7 +3007,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
if (!dentry) { if (!dentry) {
dentry = d_alloc_parallel(dir, &nd->last, &wq); dentry = d_alloc_parallel(dir, &nd->last, &wq);
if (IS_ERR(dentry)) if (IS_ERR(dentry))
return PTR_ERR(dentry); return dentry;
} }
if (d_in_lookup(dentry)) if (d_in_lookup(dentry))
break; break;
...@@ -3237,7 +3023,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, ...@@ -3237,7 +3023,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
} }
if (dentry->d_inode) { if (dentry->d_inode) {
/* Cached positive dentry: will open in f_op->open */ /* Cached positive dentry: will open in f_op->open */
goto out_no_open; return dentry;
} }
/* /*
...@@ -3249,41 +3035,27 @@ static int lookup_open(struct nameidata *nd, struct path *path, ...@@ -3249,41 +3035,27 @@ static int lookup_open(struct nameidata *nd, struct path *path,
* Another problem is returing the "right" error value (e.g. for an * Another problem is returing the "right" error value (e.g. for an
* O_EXCL open we want to return EEXIST not EROFS). * O_EXCL open we want to return EEXIST not EROFS).
*/ */
if (unlikely(!got_write))
open_flag &= ~O_TRUNC;
if (open_flag & O_CREAT) { if (open_flag & O_CREAT) {
if (open_flag & O_EXCL)
open_flag &= ~O_TRUNC;
if (!IS_POSIXACL(dir->d_inode)) if (!IS_POSIXACL(dir->d_inode))
mode &= ~current_umask(); mode &= ~current_umask();
if (unlikely(!got_write)) { if (likely(got_write))
create_error = -EROFS;
open_flag &= ~O_CREAT;
if (open_flag & (O_EXCL | O_TRUNC))
goto no_open;
/* No side effects, safe to clear O_CREAT */
} else {
create_error = may_o_create(&nd->path, dentry, mode); create_error = may_o_create(&nd->path, dentry, mode);
if (create_error) { else
open_flag &= ~O_CREAT; create_error = -EROFS;
if (open_flag & O_EXCL)
goto no_open;
}
}
} else if ((open_flag & (O_TRUNC|O_WRONLY|O_RDWR)) &&
unlikely(!got_write)) {
/*
* No O_CREATE -> atomicity not a requirement -> fall
* back to lookup + open
*/
goto no_open;
} }
if (create_error)
open_flag &= ~O_CREAT;
if (dir_inode->i_op->atomic_open) { if (dir_inode->i_op->atomic_open) {
error = atomic_open(nd, dentry, path, file, op, open_flag, dentry = atomic_open(nd, dentry, file, open_flag, mode);
mode); if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
if (unlikely(error == -ENOENT) && create_error) dentry = ERR_PTR(create_error);
error = create_error; return dentry;
return error;
} }
no_open:
if (d_in_lookup(dentry)) { if (d_in_lookup(dentry)) {
struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry, struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
nd->flags); nd->flags);
...@@ -3310,78 +3082,60 @@ static int lookup_open(struct nameidata *nd, struct path *path, ...@@ -3310,78 +3082,60 @@ static int lookup_open(struct nameidata *nd, struct path *path,
open_flag & O_EXCL); open_flag & O_EXCL);
if (error) if (error)
goto out_dput; goto out_dput;
fsnotify_create(dir_inode, dentry);
} }
if (unlikely(create_error) && !dentry->d_inode) { if (unlikely(create_error) && !dentry->d_inode) {
error = create_error; error = create_error;
goto out_dput; goto out_dput;
} }
out_no_open: return dentry;
path->dentry = dentry;
path->mnt = nd->path.mnt;
return 0;
out_dput: out_dput:
dput(dentry); dput(dentry);
return error; return ERR_PTR(error);
} }
/* static const char *open_last_lookups(struct nameidata *nd,
* Handle the last step of open()
*/
static int do_last(struct nameidata *nd,
struct file *file, const struct open_flags *op) struct file *file, const struct open_flags *op)
{ {
struct dentry *dir = nd->path.dentry; struct dentry *dir = nd->path.dentry;
kuid_t dir_uid = nd->inode->i_uid;
umode_t dir_mode = nd->inode->i_mode;
int open_flag = op->open_flag; int open_flag = op->open_flag;
bool will_truncate = (open_flag & O_TRUNC) != 0;
bool got_write = false; bool got_write = false;
int acc_mode = op->acc_mode;
unsigned seq; unsigned seq;
struct inode *inode; struct inode *inode;
struct path path; struct dentry *dentry;
const char *res;
int error; int error;
nd->flags &= ~LOOKUP_PARENT;
nd->flags |= op->intent; nd->flags |= op->intent;
if (nd->last_type != LAST_NORM) { if (nd->last_type != LAST_NORM) {
error = handle_dots(nd, nd->last_type); if (nd->depth)
if (unlikely(error)) put_link(nd);
return error; return handle_dots(nd, nd->last_type);
goto finish_open;
} }
if (!(open_flag & O_CREAT)) { if (!(open_flag & O_CREAT)) {
if (nd->last.name[nd->last.len]) if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
/* we _can_ be in RCU mode here */ /* we _can_ be in RCU mode here */
error = lookup_fast(nd, &path, &inode, &seq); dentry = lookup_fast(nd, &inode, &seq);
if (likely(error > 0)) if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (likely(dentry))
goto finish_lookup; goto finish_lookup;
if (error < 0)
return error;
BUG_ON(nd->inode != dir->d_inode);
BUG_ON(nd->flags & LOOKUP_RCU); BUG_ON(nd->flags & LOOKUP_RCU);
} else { } else {
/* create side of things */ /* create side of things */
/* if (nd->flags & LOOKUP_RCU) {
* This will *only* deal with leaving RCU mode - LOOKUP_JUMPED error = unlazy_walk(nd);
* has been cleared when we got to the last component we are if (unlikely(error))
* about to look up return ERR_PTR(error);
*/ }
error = complete_walk(nd);
if (error)
return error;
audit_inode(nd->name, dir, AUDIT_INODE_PARENT); audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
/* trailing slashes? */ /* trailing slashes? */
if (unlikely(nd->last.name[nd->last.len])) if (unlikely(nd->last.name[nd->last.len]))
return -EISDIR; return ERR_PTR(-EISDIR);
} }
if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
...@@ -3398,108 +3152,90 @@ static int do_last(struct nameidata *nd, ...@@ -3398,108 +3152,90 @@ static int do_last(struct nameidata *nd,
inode_lock(dir->d_inode); inode_lock(dir->d_inode);
else else
inode_lock_shared(dir->d_inode); inode_lock_shared(dir->d_inode);
error = lookup_open(nd, &path, file, op, got_write); dentry = lookup_open(nd, file, op, got_write);
if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
fsnotify_create(dir->d_inode, dentry);
if (open_flag & O_CREAT) if (open_flag & O_CREAT)
inode_unlock(dir->d_inode); inode_unlock(dir->d_inode);
else else
inode_unlock_shared(dir->d_inode); inode_unlock_shared(dir->d_inode);
if (error) if (got_write)
goto out; mnt_drop_write(nd->path.mnt);
if (file->f_mode & FMODE_OPENED) {
if ((file->f_mode & FMODE_CREATED) ||
!S_ISREG(file_inode(file)->i_mode))
will_truncate = false;
audit_inode(nd->name, file->f_path.dentry, 0);
goto opened;
}
if (file->f_mode & FMODE_CREATED) { if (IS_ERR(dentry))
/* Don't check for write permission, don't truncate */ return ERR_CAST(dentry);
open_flag &= ~O_TRUNC;
will_truncate = false;
acc_mode = 0;
path_to_nameidata(&path, nd);
goto finish_open_created;
}
/* if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
* If atomic_open() acquired write access it is dropped now due to dput(nd->path.dentry);
* possible mount and symlink following (this might be optimized away if nd->path.dentry = dentry;
* necessary...) return NULL;
*/
if (got_write) {
mnt_drop_write(nd->path.mnt);
got_write = false;
} }
error = follow_managed(&path, nd); finish_lookup:
if (unlikely(error < 0)) if (nd->depth)
return error; put_link(nd);
res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
if (unlikely(res))
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
return res;
}
/* /*
* create/update audit record if it already exists. * Handle the last step of open()
*/ */
audit_inode(nd->name, path.dentry, 0); static int do_open(struct nameidata *nd,
struct file *file, const struct open_flags *op)
if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) { {
path_to_nameidata(&path, nd); int open_flag = op->open_flag;
return -EEXIST; bool do_truncate;
} int acc_mode;
int error;
seq = 0; /* out of RCU mode, so the value doesn't matter */ if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
inode = d_backing_inode(path.dentry);
finish_lookup:
error = step_into(nd, &path, 0, inode, seq);
if (unlikely(error))
return error;
finish_open:
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
error = complete_walk(nd); error = complete_walk(nd);
if (error) if (error)
return error; return error;
}
if (!(file->f_mode & FMODE_CREATED))
audit_inode(nd->name, nd->path.dentry, 0); audit_inode(nd->name, nd->path.dentry, 0);
if (open_flag & O_CREAT) { if (open_flag & O_CREAT) {
error = -EISDIR; if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
return -EEXIST;
if (d_is_dir(nd->path.dentry)) if (d_is_dir(nd->path.dentry))
goto out; return -EISDIR;
error = may_create_in_sticky(dir_mode, dir_uid, error = may_create_in_sticky(nd->dir_mode, nd->dir_uid,
d_backing_inode(nd->path.dentry)); d_backing_inode(nd->path.dentry));
if (unlikely(error)) if (unlikely(error))
goto out; return error;
} }
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
goto out; return -ENOTDIR;
if (!d_is_reg(nd->path.dentry))
will_truncate = false;
if (will_truncate) { do_truncate = false;
acc_mode = op->acc_mode;
if (file->f_mode & FMODE_CREATED) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
acc_mode = 0;
} else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
error = mnt_want_write(nd->path.mnt); error = mnt_want_write(nd->path.mnt);
if (error) if (error)
goto out; return error;
got_write = true; do_truncate = true;
} }
finish_open_created:
error = may_open(&nd->path, acc_mode, open_flag); error = may_open(&nd->path, acc_mode, open_flag);
if (error) if (!error && !(file->f_mode & FMODE_OPENED))
goto out;
BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
error = vfs_open(&nd->path, file); error = vfs_open(&nd->path, file);
if (error) if (!error)
goto out;
opened:
error = ima_file_check(file, op->acc_mode); error = ima_file_check(file, op->acc_mode);
if (!error && will_truncate) if (!error && do_truncate)
error = handle_truncate(file); error = handle_truncate(file);
out:
if (unlikely(error > 0)) { if (unlikely(error > 0)) {
WARN_ON(1); WARN_ON(1);
error = -EINVAL; error = -EINVAL;
} }
if (got_write) if (do_truncate)
mnt_drop_write(nd->path.mnt); mnt_drop_write(nd->path.mnt);
return error; return error;
} }
...@@ -3604,10 +3340,10 @@ static struct file *path_openat(struct nameidata *nd, ...@@ -3604,10 +3340,10 @@ static struct file *path_openat(struct nameidata *nd,
} else { } else {
const char *s = path_init(nd, flags); const char *s = path_init(nd, flags);
while (!(error = link_path_walk(s, nd)) && while (!(error = link_path_walk(s, nd)) &&
(error = do_last(nd, file, op)) > 0) { (s = open_last_lookups(nd, file, op)) != NULL)
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); ;
s = trailing_symlink(nd); if (!error)
} error = do_open(nd, file, op);
terminate_walk(nd); terminate_walk(nd);
} }
if (likely(!error)) { if (likely(!error)) {
......
...@@ -1669,7 +1669,7 @@ int ksys_umount(char __user *name, int flags) ...@@ -1669,7 +1669,7 @@ int ksys_umount(char __user *name, int flags)
struct path path; struct path path;
struct mount *mnt; struct mount *mnt;
int retval; int retval;
int lookup_flags = 0; int lookup_flags = LOOKUP_MOUNTPOINT;
if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
return -EINVAL; return -EINVAL;
...@@ -1680,7 +1680,7 @@ int ksys_umount(char __user *name, int flags) ...@@ -1680,7 +1680,7 @@ int ksys_umount(char __user *name, int flags)
if (!(flags & UMOUNT_NOFOLLOW)) if (!(flags & UMOUNT_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW; lookup_flags |= LOOKUP_FOLLOW;
retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path); retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
if (retval) if (retval)
goto out; goto out;
mnt = real_mount(path.mnt); mnt = real_mount(path.mnt);
...@@ -2697,45 +2697,32 @@ static int do_move_mount_old(struct path *path, const char *old_name) ...@@ -2697,45 +2697,32 @@ static int do_move_mount_old(struct path *path, const char *old_name)
/* /*
* add a mount into a namespace's mount tree * add a mount into a namespace's mount tree
*/ */
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
struct path *path, int mnt_flags)
{ {
struct mountpoint *mp; struct mount *parent = real_mount(path->mnt);
struct mount *parent;
int err;
mnt_flags &= ~MNT_INTERNAL_FLAGS; mnt_flags &= ~MNT_INTERNAL_FLAGS;
mp = lock_mount(path);
if (IS_ERR(mp))
return PTR_ERR(mp);
parent = real_mount(path->mnt);
err = -EINVAL;
if (unlikely(!check_mnt(parent))) { if (unlikely(!check_mnt(parent))) {
/* that's acceptable only for automounts done in private ns */ /* that's acceptable only for automounts done in private ns */
if (!(mnt_flags & MNT_SHRINKABLE)) if (!(mnt_flags & MNT_SHRINKABLE))
goto unlock; return -EINVAL;
/* ... and for those we'd better have mountpoint still alive */ /* ... and for those we'd better have mountpoint still alive */
if (!parent->mnt_ns) if (!parent->mnt_ns)
goto unlock; return -EINVAL;
} }
/* Refuse the same filesystem on the same mount point */ /* Refuse the same filesystem on the same mount point */
err = -EBUSY;
if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
path->mnt->mnt_root == path->dentry) path->mnt->mnt_root == path->dentry)
goto unlock; return -EBUSY;
err = -EINVAL;
if (d_is_symlink(newmnt->mnt.mnt_root)) if (d_is_symlink(newmnt->mnt.mnt_root))
goto unlock; return -EINVAL;
newmnt->mnt.mnt_flags = mnt_flags; newmnt->mnt.mnt_flags = mnt_flags;
err = graft_tree(newmnt, parent, mp); return graft_tree(newmnt, parent, mp);
unlock:
unlock_mount(mp);
return err;
} }
static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags); static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
...@@ -2748,6 +2735,7 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, ...@@ -2748,6 +2735,7 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
unsigned int mnt_flags) unsigned int mnt_flags)
{ {
struct vfsmount *mnt; struct vfsmount *mnt;
struct mountpoint *mp;
struct super_block *sb = fc->root->d_sb; struct super_block *sb = fc->root->d_sb;
int error; int error;
...@@ -2768,7 +2756,13 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, ...@@ -2768,7 +2756,13 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
mnt_warn_timestamp_expiry(mountpoint, mnt); mnt_warn_timestamp_expiry(mountpoint, mnt);
error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags); mp = lock_mount(mountpoint);
if (IS_ERR(mp)) {
mntput(mnt);
return PTR_ERR(mp);
}
error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
unlock_mount(mp);
if (error < 0) if (error < 0)
mntput(mnt); mntput(mnt);
return error; return error;
...@@ -2829,23 +2823,63 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags, ...@@ -2829,23 +2823,63 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
int finish_automount(struct vfsmount *m, struct path *path) int finish_automount(struct vfsmount *m, struct path *path)
{ {
struct mount *mnt = real_mount(m); struct dentry *dentry = path->dentry;
struct mountpoint *mp;
struct mount *mnt;
int err; int err;
if (!m)
return 0;
if (IS_ERR(m))
return PTR_ERR(m);
mnt = real_mount(m);
/* The new mount record should have at least 2 refs to prevent it being /* The new mount record should have at least 2 refs to prevent it being
* expired before we get a chance to add it * expired before we get a chance to add it
*/ */
BUG_ON(mnt_get_count(mnt) < 2); BUG_ON(mnt_get_count(mnt) < 2);
if (m->mnt_sb == path->mnt->mnt_sb && if (m->mnt_sb == path->mnt->mnt_sb &&
m->mnt_root == path->dentry) { m->mnt_root == dentry) {
err = -ELOOP; err = -ELOOP;
goto fail; goto discard;
} }
err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE); /*
if (!err) * we don't want to use lock_mount() - in this case finding something
* that overmounts our mountpoint to be means "quitely drop what we've
* got", not "try to mount it on top".
*/
inode_lock(dentry->d_inode);
namespace_lock();
if (unlikely(cant_mount(dentry))) {
err = -ENOENT;
goto discard_locked;
}
rcu_read_lock();
if (unlikely(__lookup_mnt(path->mnt, dentry))) {
rcu_read_unlock();
err = 0;
goto discard_locked;
}
rcu_read_unlock();
mp = get_mountpoint(dentry);
if (IS_ERR(mp)) {
err = PTR_ERR(mp);
goto discard_locked;
}
err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
unlock_mount(mp);
if (unlikely(err))
goto discard;
mntput(m);
return 0; return 0;
fail:
discard_locked:
namespace_unlock();
inode_unlock(dentry->d_inode);
discard:
/* remove m from any expiration list it may be on */ /* remove m from any expiration list it may be on */
if (!list_empty(&mnt->mnt_expire)) { if (!list_empty(&mnt->mnt_expire)) {
namespace_lock(); namespace_lock();
......
...@@ -1046,8 +1046,10 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) ...@@ -1046,8 +1046,10 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
if (flags & O_CREAT) { if (flags & O_CREAT) {
op->intent |= LOOKUP_CREATE; op->intent |= LOOKUP_CREATE;
if (flags & O_EXCL) if (flags & O_EXCL) {
op->intent |= LOOKUP_EXCL; op->intent |= LOOKUP_EXCL;
flags |= O_NOFOLLOW;
}
} }
if (flags & O_DIRECTORY) if (flags & O_DIRECTORY)
......
...@@ -15,7 +15,7 @@ enum { MAX_NESTED_LINKS = 8 }; ...@@ -15,7 +15,7 @@ enum { MAX_NESTED_LINKS = 8 };
/* /*
* Type of the last component on LOOKUP_PARENT * Type of the last component on LOOKUP_PARENT
*/ */
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
/* pathwalk mode */ /* pathwalk mode */
#define LOOKUP_FOLLOW 0x0001 /* follow links at the end */ #define LOOKUP_FOLLOW 0x0001 /* follow links at the end */
...@@ -23,6 +23,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; ...@@ -23,6 +23,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
#define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */ #define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */
#define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */ #define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */
#define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */ #define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */
#define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */
#define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */ #define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */
#define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */ #define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */
...@@ -64,7 +65,6 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne ...@@ -64,7 +65,6 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne
extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
extern void done_path_create(struct path *, struct dentry *); extern void done_path_create(struct path *, struct dentry *);
extern struct dentry *kern_path_locked(const char *, struct path *); extern struct dentry *kern_path_locked(const char *, struct path *);
extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment