Commit d9395512 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'pull-work.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs namei updates from Al Viro:
 "RCU pathwalk cleanups.

  Storing sampled ->d_seq of the next dentry in nameidata simplifies
  life considerably, especially if we delay fetching ->d_inode until
  step_into()"

* tag 'pull-work.namei' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  step_into(): move fetching ->d_inode past handle_mounts()
  lookup_fast(): don't bother with inode
  follow_dotdot{,_rcu}(): don't bother with inode
  step_into(): lose inode argument
  namei: stash the sampled ->d_seq into nameidata
  namei: move clearing LOOKUP_RCU towards rcu_read_unlock()
  switch try_to_unlazy_next() to __legitimize_mnt()
  follow_dotdot{,_rcu}(): change calling conventions
  namei: get rid of pointless unlikely(read_seqcount_retry(...))
  __follow_mount_rcu(): verify that mount_lock remains unchanged
parents f0065400 3bd8bc89
......@@ -100,7 +100,6 @@ static inline int is_mounted(struct vfsmount *mnt)
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
extern int __legitimize_mnt(struct vfsmount *, unsigned);
extern bool legitimize_mnt(struct vfsmount *, unsigned);
static inline bool __path_is_mountpoint(const struct path *path)
{
......
......@@ -567,7 +567,7 @@ struct nameidata {
struct path root;
struct inode *inode; /* path.dentry.d_inode */
unsigned int flags, state;
unsigned seq, m_seq, r_seq;
unsigned seq, next_seq, m_seq, r_seq;
int last_type;
unsigned depth;
int total_link_count;
......@@ -665,6 +665,13 @@ static void drop_links(struct nameidata *nd)
}
}
static void leave_rcu(struct nameidata *nd)
{
nd->flags &= ~LOOKUP_RCU;
nd->seq = nd->next_seq = 0;
rcu_read_unlock();
}
static void terminate_walk(struct nameidata *nd)
{
drop_links(nd);
......@@ -678,8 +685,7 @@ static void terminate_walk(struct nameidata *nd)
nd->state &= ~ND_ROOT_GRABBED;
}
} else {
nd->flags &= ~LOOKUP_RCU;
rcu_read_unlock();
leave_rcu(nd);
}
nd->depth = 0;
nd->path.mnt = NULL;
......@@ -765,14 +771,13 @@ static bool try_to_unlazy(struct nameidata *nd)
BUG_ON(!(nd->flags & LOOKUP_RCU));
nd->flags &= ~LOOKUP_RCU;
if (unlikely(!legitimize_links(nd)))
goto out1;
if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
goto out;
if (unlikely(!legitimize_root(nd)))
goto out;
rcu_read_unlock();
leave_rcu(nd);
BUG_ON(nd->inode != parent->d_inode);
return true;
......@@ -780,7 +785,7 @@ static bool try_to_unlazy(struct nameidata *nd)
nd->path.mnt = NULL;
nd->path.dentry = NULL;
out:
rcu_read_unlock();
leave_rcu(nd);
return false;
}
......@@ -788,7 +793,6 @@ static bool try_to_unlazy(struct nameidata *nd)
* try_to_unlazy_next - try to switch to ref-walk mode.
* @nd: nameidata pathwalk data
* @dentry: next dentry to step into
* @seq: seq number to check @dentry against
* Returns: true on success, false on failure
*
* Similar to try_to_unlazy(), but here we have the next dentry already
......@@ -797,15 +801,19 @@ static bool try_to_unlazy(struct nameidata *nd)
* Nothing should touch nameidata between try_to_unlazy_next() failure and
* terminate_walk().
*/
static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsigned seq)
static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
{
int res;
BUG_ON(!(nd->flags & LOOKUP_RCU));
nd->flags &= ~LOOKUP_RCU;
if (unlikely(!legitimize_links(nd)))
goto out2;
if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq)))
goto out2;
res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
if (unlikely(res)) {
if (res > 0)
goto out2;
goto out1;
}
if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref)))
goto out1;
......@@ -818,7 +826,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
*/
if (unlikely(!lockref_get_not_dead(&dentry->d_lockref)))
goto out;
if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
goto out_dput;
/*
* Sequence counts matched. Now make sure that the root is
......@@ -826,7 +834,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
*/
if (unlikely(!legitimize_root(nd)))
goto out_dput;
rcu_read_unlock();
leave_rcu(nd);
return true;
out2:
......@@ -834,10 +842,10 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry, unsi
out1:
nd->path.dentry = NULL;
out:
rcu_read_unlock();
leave_rcu(nd);
return false;
out_dput:
rcu_read_unlock();
leave_rcu(nd);
dput(dentry);
return false;
}
......@@ -962,7 +970,7 @@ static int nd_jump_root(struct nameidata *nd)
d = nd->path.dentry;
nd->inode = d->d_inode;
nd->seq = nd->root_seq;
if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq)))
if (read_seqcount_retry(&d->d_seq, nd->seq))
return -ECHILD;
} else {
path_put(&nd->path);
......@@ -1466,8 +1474,7 @@ EXPORT_SYMBOL(follow_down);
* Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
* we meet a managed dentry that would need blocking.
*/
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
struct inode **inode, unsigned *seqp)
static bool __follow_mount_rcu(struct nameidata *nd, struct path *path)
{
struct dentry *dentry = path->dentry;
unsigned int flags = dentry->d_flags;
......@@ -1496,15 +1503,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
path->mnt = &mounted->mnt;
dentry = path->dentry = mounted->mnt.mnt_root;
nd->state |= ND_JUMPED;
*seqp = read_seqcount_begin(&dentry->d_seq);
*inode = dentry->d_inode;
/*
* We don't need to re-check ->d_seq after this
* ->d_inode read - there will be an RCU delay
* between mount hash removal and ->mnt_root
* becoming unpinned.
*/
nd->next_seq = read_seqcount_begin(&dentry->d_seq);
flags = dentry->d_flags;
// makes sure that non-RCU pathwalk could reach
// this state.
if (read_seqretry(&mount_lock, nd->m_seq))
return false;
continue;
}
if (read_seqretry(&mount_lock, nd->m_seq))
......@@ -1515,8 +1519,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
}
static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct inode **inode,
unsigned int *seqp)
struct path *path)
{
bool jumped;
int ret;
......@@ -1524,16 +1527,15 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
path->mnt = nd->path.mnt;
path->dentry = dentry;
if (nd->flags & LOOKUP_RCU) {
unsigned int seq = *seqp;
if (unlikely(!*inode))
return -ENOENT;
if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
unsigned int seq = nd->next_seq;
if (likely(__follow_mount_rcu(nd, path)))
return 0;
if (!try_to_unlazy_next(nd, dentry, seq))
return -ECHILD;
// *path might've been clobbered by __follow_mount_rcu()
// *path and nd->next_seq might've been clobbered
path->mnt = nd->path.mnt;
path->dentry = dentry;
nd->next_seq = seq;
if (!try_to_unlazy_next(nd, dentry))
return -ECHILD;
}
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
if (jumped) {
......@@ -1546,9 +1548,6 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
dput(path->dentry);
if (path->mnt != nd->path.mnt)
mntput(path->mnt);
} else {
*inode = d_backing_inode(path->dentry);
*seqp = 0; /* out of RCU mode, so the value doesn't matter */
}
return ret;
}
......@@ -1607,9 +1606,7 @@ static struct dentry *__lookup_hash(const struct qstr *name,
return dentry;
}
static struct dentry *lookup_fast(struct nameidata *nd,
struct inode **inode,
unsigned *seqp)
static struct dentry *lookup_fast(struct nameidata *nd)
{
struct dentry *dentry, *parent = nd->path.dentry;
int status = 1;
......@@ -1620,37 +1617,24 @@ static struct dentry *lookup_fast(struct nameidata *nd,
* going to fall back to non-racy lookup.
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq);
if (unlikely(!dentry)) {
if (!try_to_unlazy(nd))
return ERR_PTR(-ECHILD);
return NULL;
}
/*
* This sequence count validates that the inode matches
* the dentry name information from lookup.
*/
*inode = d_backing_inode(dentry);
if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
return ERR_PTR(-ECHILD);
/*
* This sequence count validates that the parent had no
* changes while we did the lookup of the dentry above.
*
* The memory barrier in read_seqcount_begin of child is
* enough, we can use __read_seqcount_retry here.
*/
if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
if (read_seqcount_retry(&parent->d_seq, nd->seq))
return ERR_PTR(-ECHILD);
*seqp = seq;
status = d_revalidate(dentry, nd->flags);
if (likely(status > 0))
return dentry;
if (!try_to_unlazy_next(nd, dentry, seq))
if (!try_to_unlazy_next(nd, dentry))
return ERR_PTR(-ECHILD);
if (status == -ECHILD)
/* we'd been told to redo it in non-rcu mode */
......@@ -1731,7 +1715,7 @@ static inline int may_lookup(struct user_namespace *mnt_userns,
return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
}
static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
static int reserve_stack(struct nameidata *nd, struct path *link)
{
if (unlikely(nd->total_link_count++ >= MAXSYMLINKS))
return -ELOOP;
......@@ -1746,7 +1730,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
if (nd->flags & LOOKUP_RCU) {
// we need to grab link before we do unlazy. And we can't skip
// unlazy even if we fail to grab the link - cleanup needs it
bool grabbed_link = legitimize_path(nd, link, seq);
bool grabbed_link = legitimize_path(nd, link, nd->next_seq);
if (!try_to_unlazy(nd) || !grabbed_link)
return -ECHILD;
......@@ -1760,11 +1744,11 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
static const char *pick_link(struct nameidata *nd, struct path *link,
struct inode *inode, unsigned seq, int flags)
struct inode *inode, int flags)
{
struct saved *last;
const char *res;
int error = reserve_stack(nd, link, seq);
int error = reserve_stack(nd, link);
if (unlikely(error)) {
if (!(nd->flags & LOOKUP_RCU))
......@@ -1774,7 +1758,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
last = nd->stack + nd->depth++;
last->link = *link;
clear_delayed_call(&last->done);
last->seq = seq;
last->seq = nd->next_seq;
if (flags & WALK_TRAILING) {
error = may_follow_link(nd, inode);
......@@ -1836,43 +1820,50 @@ static const char *pick_link(struct nameidata *nd, struct path *link,
* to do this check without having to look at inode->i_op,
* so we keep a cache of "no, this doesn't need follow_link"
* for the common case.
*
* NOTE: dentry must be what nd->next_seq had been sampled from.
*/
static const char *step_into(struct nameidata *nd, int flags,
struct dentry *dentry, struct inode *inode, unsigned seq)
struct dentry *dentry)
{
struct path path;
int err = handle_mounts(nd, dentry, &path, &inode, &seq);
struct inode *inode;
int err = handle_mounts(nd, dentry, &path);
if (err < 0)
return ERR_PTR(err);
inode = path.dentry->d_inode;
if (likely(!d_is_symlink(path.dentry)) ||
((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) ||
(flags & WALK_NOFOLLOW)) {
/* not a symlink or should not follow */
if (!(nd->flags & LOOKUP_RCU)) {
if (nd->flags & LOOKUP_RCU) {
if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
return ERR_PTR(-ECHILD);
if (unlikely(!inode))
return ERR_PTR(-ENOENT);
} else {
dput(nd->path.dentry);
if (nd->path.mnt != path.mnt)
mntput(nd->path.mnt);
}
nd->path = path;
nd->inode = inode;
nd->seq = seq;
nd->seq = nd->next_seq;
return NULL;
}
if (nd->flags & LOOKUP_RCU) {
/* make sure that d_is_symlink above matches inode */
if (read_seqcount_retry(&path.dentry->d_seq, seq))
if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
return ERR_PTR(-ECHILD);
} else {
if (path.mnt == nd->path.mnt)
mntget(path.mnt);
}
return pick_link(nd, &path, inode, seq, flags);
return pick_link(nd, &path, inode, flags);
}
static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
struct inode **inodep,
unsigned *seqp)
static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
{
struct dentry *parent, *old;
......@@ -1889,30 +1880,30 @@ static struct dentry *follow_dotdot_rcu(struct nameidata *nd,
nd->path = path;
nd->inode = path.dentry->d_inode;
nd->seq = seq;
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
// makes sure that non-RCU pathwalk could reach this state
if (read_seqretry(&mount_lock, nd->m_seq))
return ERR_PTR(-ECHILD);
/* we know that mountpoint was pinned */
}
old = nd->path.dentry;
parent = old->d_parent;
*inodep = parent->d_inode;
*seqp = read_seqcount_begin(&parent->d_seq);
if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
nd->next_seq = read_seqcount_begin(&parent->d_seq);
// makes sure that non-RCU pathwalk could reach this state
if (read_seqcount_retry(&old->d_seq, nd->seq))
return ERR_PTR(-ECHILD);
if (unlikely(!path_connected(nd->path.mnt, parent)))
return ERR_PTR(-ECHILD);
return parent;
in_root:
if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
if (read_seqretry(&mount_lock, nd->m_seq))
return ERR_PTR(-ECHILD);
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-ECHILD);
return NULL;
nd->next_seq = nd->seq;
return nd->path.dentry;
}
static struct dentry *follow_dotdot(struct nameidata *nd,
struct inode **inodep,
unsigned *seqp)
static struct dentry *follow_dotdot(struct nameidata *nd)
{
struct dentry *parent;
......@@ -1936,15 +1927,12 @@ static struct dentry *follow_dotdot(struct nameidata *nd,
dput(parent);
return ERR_PTR(-ENOENT);
}
*seqp = 0;
*inodep = parent->d_inode;
return parent;
in_root:
if (unlikely(nd->flags & LOOKUP_BENEATH))
return ERR_PTR(-EXDEV);
dget(nd->path.dentry);
return NULL;
return dget(nd->path.dentry);
}
static const char *handle_dots(struct nameidata *nd, int type)
......@@ -1952,8 +1940,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
if (type == LAST_DOTDOT) {
const char *error = NULL;
struct dentry *parent;
struct inode *inode;
unsigned seq;
if (!nd->root.mnt) {
error = ERR_PTR(set_root(nd));
......@@ -1961,17 +1947,12 @@ static const char *handle_dots(struct nameidata *nd, int type)
return error;
}
if (nd->flags & LOOKUP_RCU)
parent = follow_dotdot_rcu(nd, &inode, &seq);
parent = follow_dotdot_rcu(nd);
else
parent = follow_dotdot(nd, &inode, &seq);
parent = follow_dotdot(nd);
if (IS_ERR(parent))
return ERR_CAST(parent);
if (unlikely(!parent))
error = step_into(nd, WALK_NOFOLLOW,
nd->path.dentry, nd->inode, nd->seq);
else
error = step_into(nd, WALK_NOFOLLOW,
parent, inode, seq);
error = step_into(nd, WALK_NOFOLLOW, parent);
if (unlikely(error))
return error;
......@@ -1983,9 +1964,9 @@ static const char *handle_dots(struct nameidata *nd, int type)
* some fallback).
*/
smp_rmb();
if (unlikely(__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)))
if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq))
return ERR_PTR(-EAGAIN);
if (unlikely(__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)))
if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq))
return ERR_PTR(-EAGAIN);
}
}
......@@ -1995,8 +1976,6 @@ static const char *handle_dots(struct nameidata *nd, int type)
static const char *walk_component(struct nameidata *nd, int flags)
{
struct dentry *dentry;
struct inode *inode;
unsigned seq;
/*
* "." and ".." are special - ".." especially so because it has
* to be able to know about the current root directory and
......@@ -2007,7 +1986,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
put_link(nd);
return handle_dots(nd, nd->last_type);
}
dentry = lookup_fast(nd, &inode, &seq);
dentry = lookup_fast(nd);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (unlikely(!dentry)) {
......@@ -2017,7 +1996,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
}
if (!(flags & WALK_MORE) && nd->depth)
put_link(nd);
return step_into(nd, flags, dentry, inode, seq);
return step_into(nd, flags, dentry);
}
/*
......@@ -2372,6 +2351,8 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
flags &= ~LOOKUP_RCU;
if (flags & LOOKUP_RCU)
rcu_read_lock();
else
nd->seq = nd->next_seq = 0;
nd->flags = flags;
nd->state |= ND_JUMPED;
......@@ -2473,8 +2454,8 @@ static int handle_lookup_down(struct nameidata *nd)
{
if (!(nd->flags & LOOKUP_RCU))
dget(nd->path.dentry);
return PTR_ERR(step_into(nd, WALK_NOFOLLOW,
nd->path.dentry, nd->inode, nd->seq));
nd->next_seq = nd->seq;
return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry));
}
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
......@@ -3393,8 +3374,6 @@ static const char *open_last_lookups(struct nameidata *nd,
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool got_write = false;
unsigned seq;
struct inode *inode;
struct dentry *dentry;
const char *res;
......@@ -3410,7 +3389,7 @@ static const char *open_last_lookups(struct nameidata *nd,
if (nd->last.name[nd->last.len])
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
/* we _can_ be in RCU mode here */
dentry = lookup_fast(nd, &inode, &seq);
dentry = lookup_fast(nd);
if (IS_ERR(dentry))
return ERR_CAST(dentry);
if (likely(dentry))
......@@ -3464,7 +3443,7 @@ static const char *open_last_lookups(struct nameidata *nd,
finish_lookup:
if (nd->depth)
put_link(nd);
res = step_into(nd, WALK_TRAILING, dentry, inode, seq);
res = step_into(nd, WALK_TRAILING, dentry);
if (unlikely(res))
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
return res;
......
......@@ -648,7 +648,7 @@ int __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
}
/* call under rcu_read_lock */
bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
static bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
{
int res = __legitimize_mnt(bastard, seq);
if (likely(!res))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment