Commit 9c577491 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'work.dotdot1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs pathwalk sanitizing from Al Viro:
 "Massive pathwalk rewrite and cleanups.

  Several iterations have been posted; hopefully this thing is getting
  readable and understandable now. Pretty much all parts of pathname
  resolutions are affected...

  The branch is identical to what has sat in -next, except for commit
  message in "lift all calls of step_into() out of follow_dotdot/
  follow_dotdot_rcu", crediting Qian Cai for reporting the bug; only
  commit message changed there."

* 'work.dotdot1' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (69 commits)
  lookup_open(): don't bother with fallbacks to lookup+create
  atomic_open(): no need to pass struct open_flags anymore
  open_last_lookups(): move complete_walk() into do_open()
  open_last_lookups(): lift O_EXCL|O_CREAT handling into do_open()
  open_last_lookups(): don't abuse complete_walk() when all we want is unlazy
  open_last_lookups(): consolidate fsnotify_create() calls
  take post-lookup part of do_last() out of loop
  link_path_walk(): sample parent's i_uid and i_mode for the last component
  __nd_alloc_stack(): make it return bool
  reserve_stack(): switch to __nd_alloc_stack()
  pick_link(): take reserving space on stack into a new helper
  pick_link(): more straightforward handling of allocation failures
  fold path_to_nameidata() into its only remaining caller
  pick_link(): pass it struct path already with normal refcounting rules
  fs/namei.c: kill follow_mount()
  non-RCU analogue of the previous commit
  helper for mount rootwards traversal
  follow_dotdot(): be lazy about changing nd->path
  follow_dotdot_rcu(): be lazy about changing nd->path
  follow_dotdot{,_rcu}(): massage loops
  ...
parents d987ca1c 99a4a90c
......@@ -404,11 +404,8 @@ that is the "next" component in the pathname.
``int last_type``
~~~~~~~~~~~~~~~~~
This is one of ``LAST_NORM``, ``LAST_ROOT``, ``LAST_DOT``, ``LAST_DOTDOT``, or
``LAST_BIND``. The ``last`` field is only valid if the type is
``LAST_NORM``. ``LAST_BIND`` is used when following a symlink and no
components of the symlink have been processed yet. Others should be
fairly self-explanatory.
This is one of ``LAST_NORM``, ``LAST_ROOT``, ``LAST_DOT`` or ``LAST_DOTDOT``.
The ``last`` field is only valid if the type is ``LAST_NORM``.
``struct path root``
~~~~~~~~~~~~~~~~~~~~
......
......@@ -186,7 +186,7 @@ static int find_autofs_mount(const char *pathname,
struct path path;
int err;
err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0);
err = kern_path(pathname, LOOKUP_MOUNTPOINT, &path);
if (err)
return err;
err = -ENOENT;
......@@ -519,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
if (!fp || param->ioctlfd == -1) {
if (autofs_type_any(type))
err = kern_path_mountpoint(AT_FDCWD,
name, &path, LOOKUP_FOLLOW);
err = kern_path(name, LOOKUP_FOLLOW | LOOKUP_MOUNTPOINT,
&path);
else
err = find_autofs_mount(name, &path,
test_by_type, &type);
......
......@@ -60,7 +60,6 @@ extern int finish_clean_context(struct fs_context *fc);
*/
extern int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root);
extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
const char *, unsigned int, struct path *);
long do_mknodat(int dfd, const char __user *filename, umode_t mode,
......
......@@ -503,9 +503,10 @@ struct nameidata {
} *stack, internal[EMBEDDED_LEVELS];
struct filename *name;
struct nameidata *saved;
struct inode *link_inode;
unsigned root_seq;
int dfd;
kuid_t dir_uid;
umode_t dir_mode;
} __randomize_layout;
static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
......@@ -530,52 +531,34 @@ static void restore_nameidata(void)
kfree(now->stack);
}
static int __nd_alloc_stack(struct nameidata *nd)
static bool nd_alloc_stack(struct nameidata *nd)
{
struct saved *p;
if (nd->flags & LOOKUP_RCU) {
p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
GFP_ATOMIC);
if (unlikely(!p))
return -ECHILD;
} else {
p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
GFP_KERNEL);
if (unlikely(!p))
return -ENOMEM;
}
p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved),
nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL);
if (unlikely(!p))
return false;
memcpy(p, nd->internal, sizeof(nd->internal));
nd->stack = p;
return 0;
return true;
}
/**
* path_connected - Verify that a path->dentry is below path->mnt.mnt_root
* @path: nameidate to verify
* path_connected - Verify that a dentry is below mnt.mnt_root
*
* Rename can sometimes move a file or directory outside of a bind
* mount, path_connected allows those cases to be detected.
*/
static bool path_connected(const struct path *path)
static bool path_connected(struct vfsmount *mnt, struct dentry *dentry)
{
struct vfsmount *mnt = path->mnt;
struct super_block *sb = mnt->mnt_sb;
/* Bind mounts and multi-root filesystems can have disconnected paths */
if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root))
return true;
return is_subdir(path->dentry, mnt->mnt_root);
}
static inline int nd_alloc_stack(struct nameidata *nd)
{
if (likely(nd->depth != EMBEDDED_LEVELS))
return 0;
if (likely(nd->stack != nd->internal))
return 0;
return __nd_alloc_stack(nd);
return is_subdir(dentry, mnt->mnt_root);
}
static void drop_links(struct nameidata *nd)
......@@ -608,10 +591,9 @@ static void terminate_walk(struct nameidata *nd)
}
/* path_put is needed afterwards regardless of success or failure */
static bool legitimize_path(struct nameidata *nd,
struct path *path, unsigned seq)
static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq)
{
int res = __legitimize_mnt(path->mnt, nd->m_seq);
int res = __legitimize_mnt(path->mnt, mseq);
if (unlikely(res)) {
if (res > 0)
path->mnt = NULL;
......@@ -625,6 +607,12 @@ static bool legitimize_path(struct nameidata *nd,
return !read_seqcount_retry(&path->dentry->d_seq, seq);
}
static inline bool legitimize_path(struct nameidata *nd,
struct path *path, unsigned seq)
{
return __legitimize_path(path, nd->m_seq, seq);
}
static bool legitimize_links(struct nameidata *nd)
{
int i;
......@@ -858,25 +846,6 @@ static int set_root(struct nameidata *nd)
return 0;
}
static void path_put_conditional(struct path *path, struct nameidata *nd)
{
dput(path->dentry);
if (path->mnt != nd->path.mnt)
mntput(path->mnt);
}
static inline void path_to_nameidata(const struct path *path,
struct nameidata *nd)
{
if (!(nd->flags & LOOKUP_RCU)) {
dput(nd->path.dentry);
if (nd->path.mnt != path->mnt)
mntput(nd->path.mnt);
}
nd->path.mnt = path->mnt;
nd->path.dentry = path->dentry;
}
static int nd_jump_root(struct nameidata *nd)
{
if (unlikely(nd->flags & LOOKUP_BENEATH))
......@@ -969,28 +938,21 @@ int sysctl_protected_regular __read_mostly;
*
* Returns 0 if following the symlink is allowed, -ve on error.
*/
static inline int may_follow_link(struct nameidata *nd)
static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
{
const struct inode *inode;
const struct inode *parent;
kuid_t puid;
if (!sysctl_protected_symlinks)
return 0;
/* Allowed if owner and follower match. */
inode = nd->link_inode;
if (uid_eq(current_cred()->fsuid, inode->i_uid))
return 0;
/* Allowed if parent directory not sticky and world-writable. */
parent = nd->inode;
if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
return 0;
/* Allowed if parent directory and link owner match. */
puid = parent->i_uid;
if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
return 0;
if (nd->flags & LOOKUP_RCU)
......@@ -1113,63 +1075,6 @@ static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
return 0;
}
static __always_inline
const char *get_link(struct nameidata *nd)
{
struct saved *last = nd->stack + nd->depth - 1;
struct dentry *dentry = last->link.dentry;
struct inode *inode = nd->link_inode;
int error;
const char *res;
if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS))
return ERR_PTR(-ELOOP);
if (!(nd->flags & LOOKUP_RCU)) {
touch_atime(&last->link);
cond_resched();
} else if (atime_needs_update(&last->link, inode)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
touch_atime(&last->link);
}
error = security_inode_follow_link(dentry, inode,
nd->flags & LOOKUP_RCU);
if (unlikely(error))
return ERR_PTR(error);
nd->last_type = LAST_BIND;
res = READ_ONCE(inode->i_link);
if (!res) {
const char * (*get)(struct dentry *, struct inode *,
struct delayed_call *);
get = inode->i_op->get_link;
if (nd->flags & LOOKUP_RCU) {
res = get(NULL, inode, &last->done);
if (res == ERR_PTR(-ECHILD)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
res = get(dentry, inode, &last->done);
}
} else {
res = get(dentry, inode, &last->done);
}
if (IS_ERR_OR_NULL(res))
return res;
}
if (*res == '/') {
error = nd_jump_root(nd);
if (unlikely(error))
return ERR_PTR(error);
while (unlikely(*++res == '/'))
;
}
if (!*res)
res = NULL;
return res;
}
/*
* follow_up - Find the mountpoint of path's vfsmount
*
......@@ -1203,19 +1108,59 @@ int follow_up(struct path *path)
}
EXPORT_SYMBOL(follow_up);
static bool choose_mountpoint_rcu(struct mount *m, const struct path *root,
struct path *path, unsigned *seqp)
{
while (mnt_has_parent(m)) {
struct dentry *mountpoint = m->mnt_mountpoint;
m = m->mnt_parent;
if (unlikely(root->dentry == mountpoint &&
root->mnt == &m->mnt))
break;
if (mountpoint != m->mnt.mnt_root) {
path->mnt = &m->mnt;
path->dentry = mountpoint;
*seqp = read_seqcount_begin(&mountpoint->d_seq);
return true;
}
}
return false;
}
static bool choose_mountpoint(struct mount *m, const struct path *root,
struct path *path)
{
bool found;
rcu_read_lock();
while (1) {
unsigned seq, mseq = read_seqbegin(&mount_lock);
found = choose_mountpoint_rcu(m, root, path, &seq);
if (unlikely(!found)) {
if (!read_seqretry(&mount_lock, mseq))
break;
} else {
if (likely(__legitimize_path(path, seq, mseq)))
break;
rcu_read_unlock();
path_put(path);
rcu_read_lock();
}
}
rcu_read_unlock();
return found;
}
/*
* Perform an automount
* - return -EISDIR to tell follow_managed() to stop and return the path we
* were called with.
*/
static int follow_automount(struct path *path, struct nameidata *nd,
bool *need_mntput)
static int follow_automount(struct path *path, int *count, unsigned lookup_flags)
{
struct vfsmount *mnt;
int err;
if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
return -EREMOTE;
struct dentry *dentry = path->dentry;
/* We don't want to mount if someone's just doing a stat -
* unless they're stat'ing a directory and appended a '/' to
......@@ -1228,138 +1173,91 @@ static int follow_automount(struct path *path, struct nameidata *nd,
* as being automount points. These will need the attentions
* of the daemon to instantiate them before they can be used.
*/
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) &&
path->dentry->d_inode)
dentry->d_inode)
return -EISDIR;
nd->total_link_count++;
if (nd->total_link_count >= 40)
if (count && (*count)++ >= MAXSYMLINKS)
return -ELOOP;
mnt = path->dentry->d_op->d_automount(path);
if (IS_ERR(mnt)) {
/*
* The filesystem is allowed to return -EISDIR here to indicate
* it doesn't want to automount. For instance, autofs would do
* this so that its userspace daemon can mount on this dentry.
*
* However, we can only permit this if it's a terminal point in
* the path being looked up; if it wasn't then the remainder of
* the path is inaccessible and we should say so.
*/
if (PTR_ERR(mnt) == -EISDIR && (nd->flags & LOOKUP_PARENT))
return -EREMOTE;
return PTR_ERR(mnt);
}
if (!mnt) /* mount collision */
return 0;
if (!*need_mntput) {
/* lock_mount() may release path->mnt on error */
mntget(path->mnt);
*need_mntput = true;
}
err = finish_automount(mnt, path);
switch (err) {
case -EBUSY:
/* Someone else made a mount here whilst we were busy */
return 0;
case 0:
path_put(path);
path->mnt = mnt;
path->dentry = dget(mnt->mnt_root);
return 0;
default:
return err;
}
return finish_automount(dentry->d_op->d_automount(path), path);
}
/*
* Handle a dentry that is managed in some way.
* - Flagged for transit management (autofs)
* - Flagged as mountpoint
* - Flagged as automount point
*
* This may only be called in refwalk mode.
* On success path->dentry is known positive.
*
* Serialization is taken care of in namespace.c
* mount traversal - out-of-line part. One note on ->d_flags accesses -
* dentries are pinned but not locked here, so negative dentry can go
* positive right under us. Use of smp_load_acquire() provides a barrier
* sufficient for ->d_inode and ->d_flags consistency.
*/
static int follow_managed(struct path *path, struct nameidata *nd)
static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped,
int *count, unsigned lookup_flags)
{
struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
unsigned flags;
struct vfsmount *mnt = path->mnt;
bool need_mntput = false;
int ret = 0;
/* Given that we're not holding a lock here, we retain the value in a
* local variable for each dentry as we look at it so that we don't see
* the components of that value change under us */
while (flags = smp_load_acquire(&path->dentry->d_flags),
unlikely(flags & DCACHE_MANAGED_DENTRY)) {
while (flags & DCACHE_MANAGED_DENTRY) {
/* Allow the filesystem to manage the transit without i_mutex
* being held. */
if (flags & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path, false);
flags = smp_load_acquire(&path->dentry->d_flags);
if (ret < 0)
break;
}
/* Transit to a mounted filesystem. */
if (flags & DCACHE_MOUNTED) {
if (flags & DCACHE_MOUNTED) { // something's mounted on it..
struct vfsmount *mounted = lookup_mnt(path);
if (mounted) {
if (mounted) { // ... in our namespace
dput(path->dentry);
if (need_mntput)
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root);
// here we know it's positive
flags = path->dentry->d_flags;
need_mntput = true;
continue;
}
/* Something is mounted on this dentry in another
* namespace and/or whatever was mounted there in this
* namespace got unmounted before lookup_mnt() could
* get it */
}
/* Handle an automount point */
if (flags & DCACHE_NEED_AUTOMOUNT) {
ret = follow_automount(path, nd, &need_mntput);
if (ret < 0)
break;
continue;
}
if (!(flags & DCACHE_NEED_AUTOMOUNT))
break;
/* We didn't change the current path point */
break;
// uncovered automount point
ret = follow_automount(path, count, lookup_flags);
flags = smp_load_acquire(&path->dentry->d_flags);
if (ret < 0)
break;
}
if (need_mntput) {
if (path->mnt == mnt)
mntput(path->mnt);
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
ret = -EXDEV;
else
nd->flags |= LOOKUP_JUMPED;
}
if (ret == -EISDIR || !ret)
ret = 1;
if (ret > 0 && unlikely(d_flags_negative(flags)))
if (ret == -EISDIR)
ret = 0;
// possible if you race with several mount --move
if (need_mntput && path->mnt == mnt)
mntput(path->mnt);
if (!ret && unlikely(d_flags_negative(flags)))
ret = -ENOENT;
if (unlikely(ret < 0))
path_put_conditional(path, nd);
*jumped = need_mntput;
return ret;
}
static inline int traverse_mounts(struct path *path, bool *jumped,
int *count, unsigned lookup_flags)
{
unsigned flags = smp_load_acquire(&path->dentry->d_flags);
/* fastpath */
if (likely(!(flags & DCACHE_MANAGED_DENTRY))) {
*jumped = false;
if (unlikely(d_flags_negative(flags)))
return -ENOENT;
return 0;
}
return __traverse_mounts(path, flags, jumped, count, lookup_flags);
}
int follow_down_one(struct path *path)
{
struct vfsmount *mounted;
......@@ -1376,11 +1274,22 @@ int follow_down_one(struct path *path)
}
EXPORT_SYMBOL(follow_down_one);
static inline int managed_dentry_rcu(const struct path *path)
/*
* Follow down to the covering mount currently visible to userspace. At each
* point, the filesystem owning that dentry may be queried as to whether the
* caller is permitted to proceed or not.
*/
int follow_down(struct path *path)
{
return (path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) ?
path->dentry->d_op->d_manage(path, true) : 0;
struct vfsmount *mnt = path->mnt;
bool jumped;
int ret = traverse_mounts(path, &jumped, NULL, 0);
if (path->mnt != mnt)
mntput(mnt);
return ret;
}
EXPORT_SYMBOL(follow_down);
/*
* Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
......@@ -1389,204 +1298,88 @@ static inline int managed_dentry_rcu(const struct path *path)
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
struct inode **inode, unsigned *seqp)
{
struct dentry *dentry = path->dentry;
unsigned int flags = dentry->d_flags;
if (likely(!(flags & DCACHE_MANAGED_DENTRY)))
return true;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return false;
for (;;) {
struct mount *mounted;
/*
* Don't forget we might have a non-mountpoint managed dentry
* that wants to block transit.
*/
switch (managed_dentry_rcu(path)) {
case -ECHILD:
default:
return false;
case -EISDIR:
return true;
case 0:
break;
if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) {
int res = dentry->d_op->d_manage(path, true);
if (res)
return res == -EISDIR;
flags = dentry->d_flags;
}
if (!d_mountpoint(path->dentry))
return !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
mounted = __lookup_mnt(path->mnt, path->dentry);
if (!mounted)
break;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return false;
path->mnt = &mounted->mnt;
path->dentry = mounted->mnt.mnt_root;
nd->flags |= LOOKUP_JUMPED;
*seqp = read_seqcount_begin(&path->dentry->d_seq);
/*
* Update the inode too. We don't need to re-check the
* dentry sequence number here after this d_inode read,
* because a mount-point is always pinned.
*/
*inode = path->dentry->d_inode;
}
return !read_seqretry(&mount_lock, nd->m_seq) &&
!(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT);
}
static int follow_dotdot_rcu(struct nameidata *nd)
{
struct inode *inode = nd->inode;
while (1) {
if (path_equal(&nd->path, &nd->root)) {
if (unlikely(nd->flags & LOOKUP_BENEATH))
return -ECHILD;
break;
}
if (nd->path.dentry != nd->path.mnt->mnt_root) {
struct dentry *old = nd->path.dentry;
struct dentry *parent = old->d_parent;
unsigned seq;
inode = parent->d_inode;
seq = read_seqcount_begin(&parent->d_seq);
if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
return -ECHILD;
nd->path.dentry = parent;
nd->seq = seq;
if (unlikely(!path_connected(&nd->path)))
return -ECHILD;
break;
} else {
struct mount *mnt = real_mount(nd->path.mnt);
struct mount *mparent = mnt->mnt_parent;
struct dentry *mountpoint = mnt->mnt_mountpoint;
struct inode *inode2 = mountpoint->d_inode;
unsigned seq = read_seqcount_begin(&mountpoint->d_seq);
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
return -ECHILD;
if (&mparent->mnt == nd->path.mnt)
break;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return -ECHILD;
/* we know that mountpoint was pinned */
nd->path.dentry = mountpoint;
nd->path.mnt = &mparent->mnt;
inode = inode2;
nd->seq = seq;
if (flags & DCACHE_MOUNTED) {
struct mount *mounted = __lookup_mnt(path->mnt, dentry);
if (mounted) {
path->mnt = &mounted->mnt;
dentry = path->dentry = mounted->mnt.mnt_root;
nd->flags |= LOOKUP_JUMPED;
*seqp = read_seqcount_begin(&dentry->d_seq);
*inode = dentry->d_inode;
/*
* We don't need to re-check ->d_seq after this
* ->d_inode read - there will be an RCU delay
* between mount hash removal and ->mnt_root
* becoming unpinned.
*/
flags = dentry->d_flags;
continue;
}
if (read_seqretry(&mount_lock, nd->m_seq))
return false;
}
return !(flags & DCACHE_NEED_AUTOMOUNT);
}
while (unlikely(d_mountpoint(nd->path.dentry))) {
struct mount *mounted;
mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
return -ECHILD;
if (!mounted)
break;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return -ECHILD;
nd->path.mnt = &mounted->mnt;
nd->path.dentry = mounted->mnt.mnt_root;
inode = nd->path.dentry->d_inode;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
}
nd->inode = inode;
return 0;
}
/*
* Follow down to the covering mount currently visible to userspace. At each
* point, the filesystem owning that dentry may be queried as to whether the
* caller is permitted to proceed or not.
*/
int follow_down(struct path *path)
static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct inode **inode,
unsigned int *seqp)
{
unsigned managed;
bool jumped;
int ret;
while (managed = READ_ONCE(path->dentry->d_flags),
unlikely(managed & DCACHE_MANAGED_DENTRY)) {
/* Allow the filesystem to manage the transit without i_mutex
* being held.
*
* We indicate to the filesystem if someone is trying to mount
* something here. This gives autofs the chance to deny anyone
* other than its daemon the right to mount on its
* superstructure.
*
* The filesystem may sleep at this point.
*/
if (managed & DCACHE_MANAGE_TRANSIT) {
BUG_ON(!path->dentry->d_op);
BUG_ON(!path->dentry->d_op->d_manage);
ret = path->dentry->d_op->d_manage(path, false);
if (ret < 0)
return ret == -EISDIR ? 0 : ret;
}
/* Transit to a mounted filesystem. */
if (managed & DCACHE_MOUNTED) {
struct vfsmount *mounted = lookup_mnt(path);
if (!mounted)
break;
dput(path->dentry);
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root);
continue;
}
/* Don't handle automount points here */
break;
}
return 0;
}
EXPORT_SYMBOL(follow_down);
/*
* Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
*/
static void follow_mount(struct path *path)
{
while (d_mountpoint(path->dentry)) {
struct vfsmount *mounted = lookup_mnt(path);
if (!mounted)
break;
dput(path->dentry);
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root);
path->mnt = nd->path.mnt;
path->dentry = dentry;
if (nd->flags & LOOKUP_RCU) {
unsigned int seq = *seqp;
if (unlikely(!*inode))
return -ENOENT;
if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
return 0;
if (unlazy_child(nd, dentry, seq))
return -ECHILD;
// *path might've been clobbered by __follow_mount_rcu()
path->mnt = nd->path.mnt;
path->dentry = dentry;
}
}
static int path_parent_directory(struct path *path)
{
struct dentry *old = path->dentry;
/* rare case of legitimate dget_parent()... */
path->dentry = dget_parent(path->dentry);
dput(old);
if (unlikely(!path_connected(path)))
return -ENOENT;
return 0;
}
static int follow_dotdot(struct nameidata *nd)
{
while (1) {
if (path_equal(&nd->path, &nd->root)) {
if (unlikely(nd->flags & LOOKUP_BENEATH))
return -EXDEV;
break;
}
if (nd->path.dentry != nd->path.mnt->mnt_root) {
int ret = path_parent_directory(&nd->path);
if (ret)
return ret;
break;
}
if (!follow_up(&nd->path))
break;
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
if (jumped) {
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return -EXDEV;
ret = -EXDEV;
else
nd->flags |= LOOKUP_JUMPED;
}
follow_mount(&nd->path);
nd->inode = nd->path.dentry->d_inode;
return 0;
if (unlikely(ret)) {
dput(path->dentry);
if (path->mnt != nd->path.mnt)
mntput(path->mnt);
} else {
*inode = d_backing_inode(path->dentry);
*seqp = 0; /* out of RCU mode, so the value doesn't matter */
}
return ret;
}
/*
......@@ -1643,14 +1436,12 @@ static struct dentry *__lookup_hash(const struct qstr *name,
return dentry;
}
static int lookup_fast(struct nameidata *nd,
struct path *path, struct inode **inode,
unsigned *seqp)
static struct dentry *lookup_fast(struct nameidata *nd,
struct inode **inode,
unsigned *seqp)
{
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
int status = 1;
int err;
/*
* Rename seqlock is not required here because in the off chance
......@@ -1659,12 +1450,11 @@ static int lookup_fast(struct nameidata *nd,
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
bool negative;
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
if (unlikely(!dentry)) {
if (unlazy_walk(nd))
return -ECHILD;
return 0;
return ERR_PTR(-ECHILD);
return NULL;
}
/*
......@@ -1672,9 +1462,8 @@ static int lookup_fast(struct nameidata *nd,
* the dentry name information from lookup.
*/
*inode = d_backing_inode(dentry);
negative = d_is_negative(dentry);
if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
return -ECHILD;
return ERR_PTR(-ECHILD);
/*
* This sequence count validates that the parent had no
......@@ -1684,46 +1473,30 @@ static int lookup_fast(struct nameidata *nd,
* enough, we can use __read_seqcount_retry here.
*/
if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
return -ECHILD;
return ERR_PTR(-ECHILD);
*seqp = seq;
status = d_revalidate(dentry, nd->flags);
if (likely(status > 0)) {
/*
* Note: do negative dentry check after revalidation in
* case that drops it.
*/
if (unlikely(negative))
return -ENOENT;
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
return 1;
}
if (likely(status > 0))
return dentry;
if (unlazy_child(nd, dentry, seq))
return -ECHILD;
return ERR_PTR(-ECHILD);
if (unlikely(status == -ECHILD))
/* we'd been told to redo it in non-rcu mode */
status = d_revalidate(dentry, nd->flags);
} else {
dentry = __d_lookup(parent, &nd->last);
if (unlikely(!dentry))
return 0;
return NULL;
status = d_revalidate(dentry, nd->flags);
}
if (unlikely(status <= 0)) {
if (!status)
d_invalidate(dentry);
dput(dentry);
return status;
return ERR_PTR(status);
}
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd);
if (likely(err > 0))
*inode = d_backing_inode(path->dentry);
return err;
return dentry;
}
/* Fast lookup failed, do it the slow way */
......@@ -1788,21 +1561,250 @@ static inline int may_lookup(struct nameidata *nd)
return inode_permission(nd->inode, MAY_EXEC);
}
static inline int handle_dots(struct nameidata *nd, int type)
static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
{
if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
return -ELOOP;
if (likely(nd->depth != EMBEDDED_LEVELS))
return 0;
if (likely(nd->stack != nd->internal))
return 0;
if (likely(nd_alloc_stack(nd)))
return 0;
if (nd->flags & LOOKUP_RCU) {
// we need to grab link before we do unlazy. And we can't skip
// unlazy even if we fail to grab the link - cleanup needs it
bool grabbed_link = legitimize_path(nd, link, seq);
if (unlazy_walk(nd) != 0 || !grabbed_link)
return -ECHILD;
if (nd_alloc_stack(nd))
return 0;
}
return -ENOMEM;
}
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
static const char *pick_link(struct nameidata *nd, struct path *link,
struct inode *inode, unsigned seq, int flags)
{
struct saved *last;
const char *res;
int error = reserve_stack(nd, link, seq);
if (unlikely(error)) {
if (!(nd->flags & LOOKUP_RCU))
path_put(link);
return ERR_PTR(error);
}
last = nd->stack + nd->depth++;
last->link = *link;
clear_delayed_call(&last->done);
last->seq = seq;
if (flags & WALK_TRAILING) {
error = may_follow_link(nd, inode);
if (unlikely(error))
return ERR_PTR(error);
}
if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS))
return ERR_PTR(-ELOOP);
if (!(nd->flags & LOOKUP_RCU)) {
touch_atime(&last->link);
cond_resched();
} else if (atime_needs_update(&last->link, inode)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
touch_atime(&last->link);
}
error = security_inode_follow_link(link->dentry, inode,
nd->flags & LOOKUP_RCU);
if (unlikely(error))
return ERR_PTR(error);
res = READ_ONCE(inode->i_link);
if (!res) {
const char * (*get)(struct dentry *, struct inode *,
struct delayed_call *);
get = inode->i_op->get_link;
if (nd->flags & LOOKUP_RCU) {
res = get(NULL, inode, &last->done);
if (res == ERR_PTR(-ECHILD)) {
if (unlikely(unlazy_walk(nd)))
return ERR_PTR(-ECHILD);
res = get(link->dentry, inode, &last->done);
}
} else {
res = get(link->dentry, inode, &last->done);
}
if (!res)
goto all_done;
if (IS_ERR(res))
return res;
}
if (*res == '/') {
error = nd_jump_root(nd);
if (unlikely(error))
return ERR_PTR(error);
while (unlikely(*++res == '/'))
;
}
if (*res)
return res;
all_done: // pure jump
put_link(nd);
return NULL;
}
/*
* Do we need to follow links? We _really_ want to be able
* to do this check without having to look at inode->i_op,
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
*/
static const char *step_into(struct nameidata *nd, int flags,
struct dentry *dentry, struct inode *inode, unsigned seq)
{
struct path path;
int err = handle_mounts(nd, dentry, &path, &inode, &seq);
if (err < 0)
return ERR_PTR(err);
if (likely(!d_is_symlink(path.dentry)) ||
((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
(flags & WALK_NOFOLLOW)) {
/* not a symlink or should not follow */
if (!(nd->flags & LOOKUP_RCU)) {
dput(nd->path.dentry);
if (nd->path.mnt != path.mnt)
mntput(nd->path.mnt);
}
nd->path = path;
nd->inode = inode;
nd->seq = seq;
return NULL;
}
if (nd->flags & LOOKUP_RCU) {
/* make sure that d_is_symlink above matches inode */
if (read_seqcount_retry(&path.dentry->d_seq, seq))
return ERR_PTR(-ECHILD);
} else {
if (path.mnt == nd->path.mnt)
mntget(path.mnt);
}
return pick_link(nd, &path, inode, seq, flags);
}
static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
struct inode **inodep,
unsigned *seqp)
{
struct dentry *parent, *old;
if (path_equal(&nd->path, &nd->root))
goto in_root;
if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
struct path path;
unsigned seq;
if (!choose_mountpoint_rcu(real_mount(nd->path.mnt),
&nd->root, &path, &seq))
goto in_root;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return ERR_PTR(-ECHILD);
nd->path = path;
nd->inode = path.dentry->d_inode;
nd->seq = seq;
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
return ERR_PTR(-ECHILD);
/* we know that mountpoint was pinned */
}
old = nd->path.dentry;
parent = old->d_parent;
*inodep = parent->d_inode;
*seqp = read_seqcount_begin(&parent->d_seq);
if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
return ERR_PTR(-ECHILD);
if (unlikely(!path_connected(nd->path.mnt, parent)))
return ERR_PTR(-ECHILD);
return parent;
in_root:
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
return ERR_PTR(-ECHILD);
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-ECHILD);
return NULL;
}
static struct dentry *follow_dotdot(struct nameidata *nd,
struct inode **inodep,
unsigned *seqp)
{
struct dentry *parent;
if (path_equal(&nd->path, &nd->root))
goto in_root;
if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) {
struct path path;
if (!choose_mountpoint(real_mount(nd->path.mnt),
&nd->root, &path))
goto in_root;
path_put(&nd->path);
nd->path = path;
nd->inode = path.dentry->d_inode;
if (unlikely(nd->flags & LOOKUP_NO_XDEV))
return ERR_PTR(-EXDEV);
}
/* rare case of legitimate dget_parent()... */
parent = dget_parent(nd->path.dentry);
if (unlikely(!path_connected(nd->path.mnt, parent))) {
dput(parent);
return ERR_PTR(-ENOENT);
}
*seqp = 0;
*inodep = parent->d_inode;
return parent;
in_root:
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-EXDEV);
dget(nd->path.dentry);
return NULL;
}
static const char *handle_dots(struct nameidata *nd, int type)
{
if (type == LAST_DOTDOT) {
int error = 0;
const char *error = NULL;
struct dentry *parent;
struct inode *inode;
unsigned seq;
if (!nd->root.mnt) {
error = set_root(nd);
error = ERR_PTR(set_root(nd));
if (error)
return error;
}
if (nd->flags & LOOKUP_RCU)
error = follow_dotdot_rcu(nd);
parent = follow_dotdot_rcu(nd, &inode, &seq);
else
parent = follow_dotdot(nd, &inode, &seq);
if (IS_ERR(parent))
return ERR_CAST(parent);
if (unlikely(!parent))
error = step_into(nd, WALK_NOFOLLOW,
nd->path.dentry, nd->inode, nd->seq);
else
error = follow_dotdot(nd);
if (error)
error = step_into(nd, WALK_NOFOLLOW,
parent, inode, seq);
if (unlikely(error))
return error;
if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) {
......@@ -1814,119 +1816,40 @@ static inline int handle_dots(struct nameidata *nd, int type)
*/
smp_rmb();
if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
return -EAGAIN;
return ERR_PTR(-EAGAIN);
if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
return -EAGAIN;
}
}
return 0;
}
static int pick_link(struct nameidata *nd, struct path *link,
struct inode *inode, unsigned seq)
{
int error;
struct saved *last;
if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) {
path_to_nameidata(link, nd);
return -ELOOP;
}
if (!(nd->flags & LOOKUP_RCU)) {
if (link->mnt == nd->path.mnt)
mntget(link->mnt);
}
error = nd_alloc_stack(nd);
if (unlikely(error)) {
if (error == -ECHILD) {
if (unlikely(!legitimize_path(nd, link, seq))) {
drop_links(nd);
nd->depth = 0;
nd->flags &= ~LOOKUP_RCU;
nd->path.mnt = NULL;
nd->path.dentry = NULL;
rcu_read_unlock();
} else if (likely(unlazy_walk(nd)) == 0)
error = nd_alloc_stack(nd);
}
if (error) {
path_put(link);
return error;
return ERR_PTR(-EAGAIN);
}
}
last = nd->stack + nd->depth++;
last->link = *link;
clear_delayed_call(&last->done);
nd->link_inode = inode;
last->seq = seq;
return 1;
}
enum {WALK_FOLLOW = 1, WALK_MORE = 2};
/*
* Do we need to follow links? We _really_ want to be able
* to do this check without having to look at inode->i_op,
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
*/
static inline int step_into(struct nameidata *nd, struct path *path,
int flags, struct inode *inode, unsigned seq)
{
if (!(flags & WALK_MORE) && nd->depth)
put_link(nd);
if (likely(!d_is_symlink(path->dentry)) ||
!(flags & WALK_FOLLOW || nd->flags & LOOKUP_FOLLOW)) {
/* not a symlink or should not follow */
path_to_nameidata(path, nd);
nd->inode = inode;
nd->seq = seq;
return 0;
}
/* make sure that d_is_symlink above matches inode */
if (nd->flags & LOOKUP_RCU) {
if (read_seqcount_retry(&path->dentry->d_seq, seq))
return -ECHILD;
}
return pick_link(nd, path, inode, seq);
return NULL;
}
static int walk_component(struct nameidata *nd, int flags)
static const char *walk_component(struct nameidata *nd, int flags)
{
struct path path;
struct dentry *dentry;
struct inode *inode;
unsigned seq;
int err;
/*
* "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and
* parent relationships.
*/
if (unlikely(nd->last_type != LAST_NORM)) {
err = handle_dots(nd, nd->last_type);
if (!(flags & WALK_MORE) && nd->depth)
put_link(nd);
return err;
return handle_dots(nd, nd->last_type);
}
err = lookup_fast(nd, &path, &inode, &seq);
if (unlikely(err <= 0)) {
if (err < 0)
return err;
path.dentry = lookup_slow(&nd->last, nd->path.dentry,
nd->flags);
if (IS_ERR(path.dentry))
return PTR_ERR(path.dentry);
path.mnt = nd->path.mnt;
err = follow_managed(&path, nd);
if (unlikely(err < 0))
return err;
seq = 0; /* we are already out of RCU mode */
inode = d_backing_inode(path.dentry);
dentry = lookup_fast(nd, &inode, &seq);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (unlikely(!dentry)) {
dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
}
return step_into(nd, &path, flags, inode, seq);
if (!(flags & WALK_MORE) && nd->depth)
put_link(nd);
return step_into(nd, flags, dentry, inode, seq);
}
/*
......@@ -2167,8 +2090,11 @@ static inline u64 hash_name(const void *salt, const char *name)
*/
static int link_path_walk(const char *name, struct nameidata *nd)
{
int depth = 0; // depth <= nd->depth
int err;
nd->last_type = LAST_ROOT;
nd->flags |= LOOKUP_PARENT;
if (IS_ERR(name))
return PTR_ERR(name);
while (*name=='/')
......@@ -2178,6 +2104,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
/* At this point we know we have a real path component. */
for(;;) {
const char *link;
u64 hash_len;
int type;
......@@ -2227,36 +2154,27 @@ static int link_path_walk(const char *name, struct nameidata *nd)
} while (unlikely(*name == '/'));
if (unlikely(!*name)) {
OK:
/* pathname body, done */
if (!nd->depth)
return 0;
name = nd->stack[nd->depth - 1].name;
/* trailing symlink, done */
if (!name)
/* pathname or trailing symlink, done */
if (!depth) {
nd->dir_uid = nd->inode->i_uid;
nd->dir_mode = nd->inode->i_mode;
nd->flags &= ~LOOKUP_PARENT;
return 0;
}
/* last component of nested symlink */
err = walk_component(nd, WALK_FOLLOW);
name = nd->stack[--depth].name;
link = walk_component(nd, 0);
} else {
/* not the last component */
err = walk_component(nd, WALK_FOLLOW | WALK_MORE);
link = walk_component(nd, WALK_MORE);
}
if (err < 0)
return err;
if (err) {
const char *s = get_link(nd);
if (IS_ERR(s))
return PTR_ERR(s);
err = 0;
if (unlikely(!s)) {
/* jumped */
put_link(nd);
} else {
nd->stack[nd->depth - 1].name = name;
name = s;
continue;
}
if (unlikely(link)) {
if (IS_ERR(link))
return PTR_ERR(link);
/* a symlink to follow */
nd->stack[depth++].name = name;
name = link;
continue;
}
if (unlikely(!d_can_lookup(nd->path.dentry))) {
if (nd->flags & LOOKUP_RCU) {
......@@ -2279,8 +2197,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
if (flags & LOOKUP_RCU)
rcu_read_lock();
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT;
nd->flags = flags | LOOKUP_JUMPED;
nd->depth = 0;
nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount);
......@@ -2370,54 +2287,20 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
return s;
}
static const char *trailing_symlink(struct nameidata *nd)
{
const char *s;
int error = may_follow_link(nd);
if (unlikely(error))
return ERR_PTR(error);
nd->flags |= LOOKUP_PARENT;
nd->stack[0].name = NULL;
s = get_link(nd);
return s ? s : "";
}
static inline int lookup_last(struct nameidata *nd)
static inline const char *lookup_last(struct nameidata *nd)
{
if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
nd->flags &= ~LOOKUP_PARENT;
return walk_component(nd, 0);
return walk_component(nd, WALK_TRAILING);
}
static int handle_lookup_down(struct nameidata *nd)
{
struct path path = nd->path;
struct inode *inode = nd->inode;
unsigned seq = nd->seq;
int err;
if (nd->flags & LOOKUP_RCU) {
/*
* don't bother with unlazy_walk on failure - we are
* at the very beginning of walk, so we lose nothing
* if we simply redo everything in non-RCU mode
*/
if (unlikely(!__follow_mount_rcu(nd, &path, &inode, &seq)))
return -ECHILD;
} else {
dget(path.dentry);
err = follow_managed(&path, nd);
if (unlikely(err < 0))
return err;
inode = d_backing_inode(path.dentry);
seq = 0;
}
path_to_nameidata(&path, nd);
nd->inode = inode;
nd->seq = seq;
return 0;
if (!(nd->flags & LOOKUP_RCU))
dget(nd->path.dentry);
return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
nd->path.dentry, nd->inode, nd->seq));
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
......@@ -2432,16 +2315,19 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
s = ERR_PTR(err);
}
while (!(err = link_path_walk(s, nd))
&& ((err = lookup_last(nd)) > 0)) {
s = trailing_symlink(nd);
}
while (!(err = link_path_walk(s, nd)) &&
(s = lookup_last(nd)) != NULL)
;
if (!err)
err = complete_walk(nd);
if (!err && nd->flags & LOOKUP_DIRECTORY)
if (!d_can_lookup(nd->path.dentry))
err = -ENOTDIR;
if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
err = handle_lookup_down(nd);
nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please...
}
if (!err) {
*path = nd->path;
nd->path.mnt = NULL;
......@@ -2470,7 +2356,8 @@ int filename_lookup(int dfd, struct filename *name, unsigned flags,
retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path);
if (likely(!retval))
audit_inode(name, path->dentry, 0);
audit_inode(name, path->dentry,
flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
restore_nameidata();
putname(name);
return retval;
......@@ -2718,24 +2605,23 @@ int path_pts(struct path *path)
/* Find something mounted on "pts" in the same directory as
* the input path.
*/
struct dentry *child, *parent;
struct qstr this;
int ret;
ret = path_parent_directory(path);
if (ret)
return ret;
struct dentry *parent = dget_parent(path->dentry);
struct dentry *child;
struct qstr this = QSTR_INIT("pts", 3);
parent = path->dentry;
this.name = "pts";
this.len = 3;
if (unlikely(!path_connected(path->mnt, parent))) {
dput(parent);
return -ENOENT;
}
dput(path->dentry);
path->dentry = parent;
child = d_hash_and_lookup(parent, &this);
if (!child)
return -ENOENT;
path->dentry = child;
dput(parent);
follow_mount(path);
follow_down(path);
return 0;
}
#endif
......@@ -2748,88 +2634,6 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
}
EXPORT_SYMBOL(user_path_at_empty);
/**
* path_mountpoint - look up a path to be umounted
* @nd: lookup context
* @flags: lookup flags
* @path: pointer to container for result
*
* Look up the given name, but don't attempt to revalidate the last component.
* Returns 0 and "path" will be valid on success; Returns error otherwise.
*/
static int
path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path)
{
const char *s = path_init(nd, flags);
int err;
while (!(err = link_path_walk(s, nd)) &&
(err = lookup_last(nd)) > 0) {
s = trailing_symlink(nd);
}
if (!err && (nd->flags & LOOKUP_RCU))
err = unlazy_walk(nd);
if (!err)
err = handle_lookup_down(nd);
if (!err) {
*path = nd->path;
nd->path.mnt = NULL;
nd->path.dentry = NULL;
}
terminate_walk(nd);
return err;
}
static int
filename_mountpoint(int dfd, struct filename *name, struct path *path,
unsigned int flags)
{
struct nameidata nd;
int error;
if (IS_ERR(name))
return PTR_ERR(name);
set_nameidata(&nd, dfd, name);
error = path_mountpoint(&nd, flags | LOOKUP_RCU, path);
if (unlikely(error == -ECHILD))
error = path_mountpoint(&nd, flags, path);
if (unlikely(error == -ESTALE))
error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path);
if (likely(!error))
audit_inode(name, path->dentry, AUDIT_INODE_NOEVAL);
restore_nameidata();
putname(name);
return error;
}
/**
* user_path_mountpoint_at - lookup a path from userland in order to umount it
* @dfd: directory file descriptor
* @name: pathname from userland
* @flags: lookup flags
* @path: pointer to container to hold result
*
* A umount is a special case for path walking. We're not actually interested
* in the inode in this situation, and ESTALE errors can be a problem. We
* simply want track down the dentry and vfsmount attached at the mountpoint
* and avoid revalidating the last component.
*
* Returns 0 and populates "path" on success.
*/
int
user_path_mountpoint_at(int dfd, const char __user *name, unsigned int flags,
struct path *path)
{
return filename_mountpoint(dfd, getname(name), path, flags);
}
int
kern_path_mountpoint(int dfd, const char *name, struct path *path,
unsigned int flags)
{
return filename_mountpoint(dfd, getname_kernel(name), path, flags);
}
EXPORT_SYMBOL(kern_path_mountpoint);
int __check_sticky(struct inode *dir, struct inode *inode)
{
kuid_t fsuid = current_fsuid();
......@@ -3127,18 +2931,14 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
*
* Returns an error code otherwise.
*/
static int atomic_open(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct file *file,
const struct open_flags *op,
int open_flag, umode_t mode)
static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
struct file *file,
int open_flag, umode_t mode)
{
struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
struct inode *dir = nd->path.dentry->d_inode;
int error;
if (!(~open_flag & (O_EXCL | O_CREAT))) /* both O_EXCL and O_CREAT */
open_flag &= ~O_TRUNC;
if (nd->flags & LOOKUP_DIRECTORY)
open_flag |= O_DIRECTORY;
......@@ -3149,19 +2949,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
d_lookup_done(dentry);
if (!error) {
if (file->f_mode & FMODE_OPENED) {
/*
* We didn't have the inode before the open, so check open
* permission here.
*/
int acc_mode = op->acc_mode;
if (file->f_mode & FMODE_CREATED) {
WARN_ON(!(open_flag & O_CREAT));
fsnotify_create(dir, dentry);
acc_mode = 0;
if (unlikely(dentry != file->f_path.dentry)) {
dput(dentry);
dentry = dget(file->f_path.dentry);
}
error = may_open(&file->f_path, acc_mode, open_flag);
if (WARN_ON(error > 0))
error = -EINVAL;
} else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
error = -EIO;
} else {
......@@ -3169,19 +2960,15 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
dput(dentry);
dentry = file->f_path.dentry;
}
if (file->f_mode & FMODE_CREATED)
fsnotify_create(dir, dentry);
if (unlikely(d_is_negative(dentry))) {
if (unlikely(d_is_negative(dentry)))
error = -ENOENT;
} else {
path->dentry = dentry;
path->mnt = nd->path.mnt;
return 0;
}
}
}
dput(dentry);
return error;
if (error) {
dput(dentry);
dentry = ERR_PTR(error);
}
return dentry;
}
/*
......@@ -3199,10 +2986,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
*
* An error code is returned on failure.
*/
static int lookup_open(struct nameidata *nd, struct path *path,
struct file *file,
const struct open_flags *op,
bool got_write)
static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
const struct open_flags *op,
bool got_write)
{
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
......@@ -3213,7 +2999,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (unlikely(IS_DEADDIR(dir_inode)))
return -ENOENT;
return ERR_PTR(-ENOENT);
file->f_mode &= ~FMODE_CREATED;
dentry = d_lookup(dir, &nd->last);
......@@ -3221,7 +3007,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
if (!dentry) {
dentry = d_alloc_parallel(dir, &nd->last, &wq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
return dentry;
}
if (d_in_lookup(dentry))
break;
......@@ -3237,7 +3023,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
}
if (dentry->d_inode) {
/* Cached positive dentry: will open in f_op->open */
goto out_no_open;
return dentry;
}
/*
......@@ -3249,41 +3035,27 @@ static int lookup_open(struct nameidata *nd, struct path *path,
* Another problem is returing the "right" error value (e.g. for an
* O_EXCL open we want to return EEXIST not EROFS).
*/
if (unlikely(!got_write))
open_flag &= ~O_TRUNC;
if (open_flag & O_CREAT) {
if (open_flag & O_EXCL)
open_flag &= ~O_TRUNC;
if (!IS_POSIXACL(dir->d_inode))
mode &= ~current_umask();
if (unlikely(!got_write)) {
create_error = -EROFS;
open_flag &= ~O_CREAT;
if (open_flag & (O_EXCL | O_TRUNC))
goto no_open;
/* No side effects, safe to clear O_CREAT */
} else {
if (likely(got_write))
create_error = may_o_create(&nd->path, dentry, mode);
if (create_error) {
open_flag &= ~O_CREAT;
if (open_flag & O_EXCL)
goto no_open;
}
}
} else if ((open_flag & (O_TRUNC|O_WRONLY|O_RDWR)) &&
unlikely(!got_write)) {
/*
* No O_CREATE -> atomicity not a requirement -> fall
* back to lookup + open
*/
goto no_open;
else
create_error = -EROFS;
}
if (create_error)
open_flag &= ~O_CREAT;
if (dir_inode->i_op->atomic_open) {
error = atomic_open(nd, dentry, path, file, op, open_flag,
mode);
if (unlikely(error == -ENOENT) && create_error)
error = create_error;
return error;
dentry = atomic_open(nd, dentry, file, open_flag, mode);
if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
dentry = ERR_PTR(create_error);
return dentry;
}
no_open:
if (d_in_lookup(dentry)) {
struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
nd->flags);
......@@ -3310,78 +3082,60 @@ static int lookup_open(struct nameidata *nd, struct path *path,
open_flag & O_EXCL);
if (error)
goto out_dput;
fsnotify_create(dir_inode, dentry);
}
if (unlikely(create_error) && !dentry->d_inode) {
error = create_error;
goto out_dput;
}
out_no_open:
path->dentry = dentry;
path->mnt = nd->path.mnt;
return 0;
return dentry;
out_dput:
dput(dentry);
return error;
return ERR_PTR(error);
}
/*
* Handle the last step of open()
*/
static int do_last(struct nameidata *nd,
static const char *open_last_lookups(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
struct dentry *dir = nd->path.dentry;
kuid_t dir_uid = nd->inode->i_uid;
umode_t dir_mode = nd->inode->i_mode;
int open_flag = op->open_flag;
bool will_truncate = (open_flag & O_TRUNC) != 0;
bool got_write = false;
int acc_mode = op->acc_mode;
unsigned seq;
struct inode *inode;
struct path path;
struct dentry *dentry;
const char *res;
int error;
nd->flags &= ~LOOKUP_PARENT;
nd->flags |= op->intent;
if (nd->last_type != LAST_NORM) {
error = handle_dots(nd, nd->last_type);
if (unlikely(error))
return error;
goto finish_open;
if (nd->depth)
put_link(nd);
return handle_dots(nd, nd->last_type);
}
if (!(open_flag & O_CREAT)) {
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
/* we _can_ be in RCU mode here */
error = lookup_fast(nd, &path, &inode, &seq);
if (likely(error > 0))
dentry = lookup_fast(nd, &inode, &seq);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (likely(dentry))
goto finish_lookup;
if (error < 0)
return error;
BUG_ON(nd->inode != dir->d_inode);
BUG_ON(nd->flags & LOOKUP_RCU);
} else {
/* create side of things */
/*
* This will *only* deal with leaving RCU mode - LOOKUP_JUMPED
* has been cleared when we got to the last component we are
* about to look up
*/
error = complete_walk(nd);
if (error)
return error;
if (nd->flags & LOOKUP_RCU) {
error = unlazy_walk(nd);
if (unlikely(error))
return ERR_PTR(error);
}
audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
/* trailing slashes? */
if (unlikely(nd->last.name[nd->last.len]))
return -EISDIR;
return ERR_PTR(-EISDIR);
}
if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
......@@ -3398,108 +3152,90 @@ static int do_last(struct nameidata *nd,
inode_lock(dir->d_inode);
else
inode_lock_shared(dir->d_inode);
error = lookup_open(nd, &path, file, op, got_write);
dentry = lookup_open(nd, file, op, got_write);
if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
fsnotify_create(dir->d_inode, dentry);
if (open_flag & O_CREAT)
inode_unlock(dir->d_inode);
else
inode_unlock_shared(dir->d_inode);
if (error)
goto out;
if (file->f_mode & FMODE_OPENED) {
if ((file->f_mode & FMODE_CREATED) ||
!S_ISREG(file_inode(file)->i_mode))
will_truncate = false;
audit_inode(nd->name, file->f_path.dentry, 0);
goto opened;
}
if (got_write)
mnt_drop_write(nd->path.mnt);
if (file->f_mode & FMODE_CREATED) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
will_truncate = false;
acc_mode = 0;
path_to_nameidata(&path, nd);
goto finish_open_created;
}
if (IS_ERR(dentry))
return ERR_CAST(dentry);
/*
* If atomic_open() acquired write access it is dropped now due to
* possible mount and symlink following (this might be optimized away if
* necessary...)
*/
if (got_write) {
mnt_drop_write(nd->path.mnt);
got_write = false;
if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
dput(nd->path.dentry);
nd->path.dentry = dentry;
return NULL;
}
error = follow_managed(&path, nd);
if (unlikely(error < 0))
return error;
finish_lookup:
if (nd->depth)
put_link(nd);
res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
if (unlikely(res))
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
return res;
}
/*
* create/update audit record if it already exists.
*/
audit_inode(nd->name, path.dentry, 0);
/*
* Handle the last step of open()
*/
static int do_open(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
int open_flag = op->open_flag;
bool do_truncate;
int acc_mode;
int error;
if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
path_to_nameidata(&path, nd);
return -EEXIST;
if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
error = complete_walk(nd);
if (error)
return error;
}
seq = 0; /* out of RCU mode, so the value doesn't matter */
inode = d_backing_inode(path.dentry);
finish_lookup:
error = step_into(nd, &path, 0, inode, seq);
if (unlikely(error))
return error;
finish_open:
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
error = complete_walk(nd);
if (error)
return error;
audit_inode(nd->name, nd->path.dentry, 0);
if (!(file->f_mode & FMODE_CREATED))
audit_inode(nd->name, nd->path.dentry, 0);
if (open_flag & O_CREAT) {
error = -EISDIR;
if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
return -EEXIST;
if (d_is_dir(nd->path.dentry))
goto out;
error = may_create_in_sticky(dir_mode, dir_uid,
return -EISDIR;
error = may_create_in_sticky(nd->dir_mode, nd->dir_uid,
d_backing_inode(nd->path.dentry));
if (unlikely(error))
goto out;
return error;
}
error = -ENOTDIR;
if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
goto out;
if (!d_is_reg(nd->path.dentry))
will_truncate = false;
return -ENOTDIR;
if (will_truncate) {
do_truncate = false;
acc_mode = op->acc_mode;
if (file->f_mode & FMODE_CREATED) {
/* Don't check for write permission, don't truncate */
open_flag &= ~O_TRUNC;
acc_mode = 0;
} else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) {
error = mnt_want_write(nd->path.mnt);
if (error)
goto out;
got_write = true;
return error;
do_truncate = true;
}
finish_open_created:
error = may_open(&nd->path, acc_mode, open_flag);
if (error)
goto out;
BUG_ON(file->f_mode & FMODE_OPENED); /* once it's opened, it's opened */
error = vfs_open(&nd->path, file);
if (error)
goto out;
opened:
error = ima_file_check(file, op->acc_mode);
if (!error && will_truncate)
if (!error && !(file->f_mode & FMODE_OPENED))
error = vfs_open(&nd->path, file);
if (!error)
error = ima_file_check(file, op->acc_mode);
if (!error && do_truncate)
error = handle_truncate(file);
out:
if (unlikely(error > 0)) {
WARN_ON(1);
error = -EINVAL;
}
if (got_write)
if (do_truncate)
mnt_drop_write(nd->path.mnt);
return error;
}
......@@ -3604,10 +3340,10 @@ static struct file *path_openat(struct nameidata *nd,
} else {
const char *s = path_init(nd, flags);
while (!(error = link_path_walk(s, nd)) &&
(error = do_last(nd, file, op)) > 0) {
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
s = trailing_symlink(nd);
}
(s = open_last_lookups(nd, file, op)) != NULL)
;
if (!error)
error = do_open(nd, file, op);
terminate_walk(nd);
}
if (likely(!error)) {
......
......@@ -1669,7 +1669,7 @@ int ksys_umount(char __user *name, int flags)
struct path path;
struct mount *mnt;
int retval;
int lookup_flags = 0;
int lookup_flags = LOOKUP_MOUNTPOINT;
if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
return -EINVAL;
......@@ -1680,7 +1680,7 @@ int ksys_umount(char __user *name, int flags)
if (!(flags & UMOUNT_NOFOLLOW))
lookup_flags |= LOOKUP_FOLLOW;
retval = user_path_mountpoint_at(AT_FDCWD, name, lookup_flags, &path);
retval = user_path_at(AT_FDCWD, name, lookup_flags, &path);
if (retval)
goto out;
mnt = real_mount(path.mnt);
......@@ -2697,45 +2697,32 @@ static int do_move_mount_old(struct path *path, const char *old_name)
/*
* add a mount into a namespace's mount tree
*/
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
static int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
struct path *path, int mnt_flags)
{
struct mountpoint *mp;
struct mount *parent;
int err;
struct mount *parent = real_mount(path->mnt);
mnt_flags &= ~MNT_INTERNAL_FLAGS;
mp = lock_mount(path);
if (IS_ERR(mp))
return PTR_ERR(mp);
parent = real_mount(path->mnt);
err = -EINVAL;
if (unlikely(!check_mnt(parent))) {
/* that's acceptable only for automounts done in private ns */
if (!(mnt_flags & MNT_SHRINKABLE))
goto unlock;
return -EINVAL;
/* ... and for those we'd better have mountpoint still alive */
if (!parent->mnt_ns)
goto unlock;
return -EINVAL;
}
/* Refuse the same filesystem on the same mount point */
err = -EBUSY;
if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb &&
path->mnt->mnt_root == path->dentry)
goto unlock;
return -EBUSY;
err = -EINVAL;
if (d_is_symlink(newmnt->mnt.mnt_root))
goto unlock;
return -EINVAL;
newmnt->mnt.mnt_flags = mnt_flags;
err = graft_tree(newmnt, parent, mp);
unlock:
unlock_mount(mp);
return err;
return graft_tree(newmnt, parent, mp);
}
static bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
......@@ -2748,6 +2735,7 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
unsigned int mnt_flags)
{
struct vfsmount *mnt;
struct mountpoint *mp;
struct super_block *sb = fc->root->d_sb;
int error;
......@@ -2768,7 +2756,13 @@ static int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
mnt_warn_timestamp_expiry(mountpoint, mnt);
error = do_add_mount(real_mount(mnt), mountpoint, mnt_flags);
mp = lock_mount(mountpoint);
if (IS_ERR(mp)) {
mntput(mnt);
return PTR_ERR(mp);
}
error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
unlock_mount(mp);
if (error < 0)
mntput(mnt);
return error;
......@@ -2829,23 +2823,63 @@ static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
int finish_automount(struct vfsmount *m, struct path *path)
{
struct mount *mnt = real_mount(m);
struct dentry *dentry = path->dentry;
struct mountpoint *mp;
struct mount *mnt;
int err;
if (!m)
return 0;
if (IS_ERR(m))
return PTR_ERR(m);
mnt = real_mount(m);
/* The new mount record should have at least 2 refs to prevent it being
* expired before we get a chance to add it
*/
BUG_ON(mnt_get_count(mnt) < 2);
if (m->mnt_sb == path->mnt->mnt_sb &&
m->mnt_root == path->dentry) {
m->mnt_root == dentry) {
err = -ELOOP;
goto fail;
goto discard;
}
err = do_add_mount(mnt, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
if (!err)
return 0;
fail:
/*
* we don't want to use lock_mount() - in this case finding something
* that overmounts our mountpoint to be means "quitely drop what we've
* got", not "try to mount it on top".
*/
inode_lock(dentry->d_inode);
namespace_lock();
if (unlikely(cant_mount(dentry))) {
err = -ENOENT;
goto discard_locked;
}
rcu_read_lock();
if (unlikely(__lookup_mnt(path->mnt, dentry))) {
rcu_read_unlock();
err = 0;
goto discard_locked;
}
rcu_read_unlock();
mp = get_mountpoint(dentry);
if (IS_ERR(mp)) {
err = PTR_ERR(mp);
goto discard_locked;
}
err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
unlock_mount(mp);
if (unlikely(err))
goto discard;
mntput(m);
return 0;
discard_locked:
namespace_unlock();
inode_unlock(dentry->d_inode);
discard:
/* remove m from any expiration list it may be on */
if (!list_empty(&mnt->mnt_expire)) {
namespace_lock();
......
......@@ -1046,8 +1046,10 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
if (flags & O_CREAT) {
op->intent |= LOOKUP_CREATE;
if (flags & O_EXCL)
if (flags & O_EXCL) {
op->intent |= LOOKUP_EXCL;
flags |= O_NOFOLLOW;
}
}
if (flags & O_DIRECTORY)
......
......@@ -15,7 +15,7 @@ enum { MAX_NESTED_LINKS = 8 };
/*
* Type of the last component on LOOKUP_PARENT
*/
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT};
/* pathwalk mode */
#define LOOKUP_FOLLOW 0x0001 /* follow links at the end */
......@@ -23,6 +23,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
#define LOOKUP_AUTOMOUNT 0x0004 /* force terminal automount */
#define LOOKUP_EMPTY 0x4000 /* accept empty path [user_... only] */
#define LOOKUP_DOWN 0x8000 /* follow mounts in the starting point */
#define LOOKUP_MOUNTPOINT 0x0080 /* follow mounts in the end */
#define LOOKUP_REVAL 0x0020 /* tell ->d_revalidate() to trust no cache */
#define LOOKUP_RCU 0x0040 /* RCU pathwalk mode; semi-internal */
......@@ -64,7 +65,6 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne
extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
extern void done_path_create(struct path *, struct dentry *);
extern struct dentry *kern_path_locked(const char *, struct path *);
extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int);
extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int);
extern struct dentry *lookup_one_len(const char *, struct dentry *, int);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment