Commit 85d217f4 authored by Alexander Viro's avatar Alexander Viro Committed by Linus Torvalds

[PATCH] Re: 2.5.11 breakage

	OK, here comes.  Patch below is an attempt to do the fastwalk
stuff in right way and so far it seems to be working.

 - dentry leak is plugged
 - locked/unlocked state of nameidata doesn't depend on history - it
   depends only on point in code.
 - LOOKUP_LOCKED is gone.
 - following mounts and .. doesn't drop dcache_lock
 - light-weight permission check distinguishes between "don't know" and
   "permission denied", so we don't call full-blown permission() unless
   we have to.
 - code that changes root/pwd holds dcache_lock _and_ write lock on
   current->fs->lock.  I.e. if we hold dcache_lock we can safely
   access our ->fs->{root,pwd}{,mnt}
 - __d_lookup() does not increment refcount; callers do dget_locked()
   if they need it (behaviour of d_lookup() didn't change, obviously).
 - link_path_walk() logics had been (somewhat) cleaned up.
parent 7ca32047
...@@ -849,6 +849,8 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) ...@@ -849,6 +849,8 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
struct dentry * dentry; struct dentry * dentry;
spin_lock(&dcache_lock); spin_lock(&dcache_lock);
dentry = __d_lookup(parent,name); dentry = __d_lookup(parent,name);
if (dentry)
__dget_locked(dentry);
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
return dentry; return dentry;
} }
...@@ -881,7 +883,6 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) ...@@ -881,7 +883,6 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
if (memcmp(dentry->d_name.name, str, len)) if (memcmp(dentry->d_name.name, str, len))
continue; continue;
} }
__dget_locked(dentry);
dentry->d_vfs_flags |= DCACHE_REFERENCED; dentry->d_vfs_flags |= DCACHE_REFERENCED;
return dentry; return dentry;
} }
......
...@@ -279,36 +279,24 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, ...@@ -279,36 +279,24 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
} }
/*for fastwalking*/ /*for fastwalking*/
static inline void undo_locked(struct nameidata *nd) static inline void unlock_nd(struct nameidata *nd)
{ {
if(nd->flags & LOOKUP_LOCKED){ struct vfsmount *mnt = nd->old_mnt;
dget_locked(nd->dentry); struct dentry *dentry = nd->old_dentry;
mntget(nd->mnt); mntget(nd->mnt);
spin_unlock(&dcache_lock); dget_locked(nd->dentry);
nd->flags &= ~LOOKUP_LOCKED; nd->old_mnt = NULL;
} nd->old_dentry = NULL;
spin_unlock(&dcache_lock);
dput(dentry);
mntput(mnt);
} }
/* static inline void lock_nd(struct nameidata *nd)
* For fast path lookup while holding the dcache_lock.
* SMP-safe
*/
static struct dentry * cached_lookup_nd(struct nameidata * nd, struct qstr * name, int flags)
{ {
struct dentry * dentry = NULL; spin_lock(&dcache_lock);
if(!(nd->flags & LOOKUP_LOCKED)) nd->old_mnt = nd->mnt;
return cached_lookup(nd->dentry, name, flags); nd->old_dentry = nd->dentry;
dentry = __d_lookup(nd->dentry, name);
if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
undo_locked(nd);
if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
dput(dentry);
dentry = NULL;
}
}
return dentry;
} }
/* /*
...@@ -326,7 +314,7 @@ static inline int exec_permission_lite(struct inode *inode) ...@@ -326,7 +314,7 @@ static inline int exec_permission_lite(struct inode *inode)
umode_t mode = inode->i_mode; umode_t mode = inode->i_mode;
if ((inode->i_op && inode->i_op->permission)) if ((inode->i_op && inode->i_op->permission))
return -EACCES; return -EAGAIN;
if (current->fsuid == inode->i_uid) if (current->fsuid == inode->i_uid)
mode >>= 6; mode >>= 6;
...@@ -418,10 +406,10 @@ static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) ...@@ -418,10 +406,10 @@ static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
return -ELOOP; return -ELOOP;
} }
static inline int __follow_up(struct vfsmount **mnt, struct dentry **base) int follow_up(struct vfsmount **mnt, struct dentry **dentry)
{ {
struct vfsmount *parent; struct vfsmount *parent;
struct dentry *dentry; struct dentry *mountpoint;
spin_lock(&dcache_lock); spin_lock(&dcache_lock);
parent=(*mnt)->mnt_parent; parent=(*mnt)->mnt_parent;
if (parent == *mnt) { if (parent == *mnt) {
...@@ -429,18 +417,27 @@ static inline int __follow_up(struct vfsmount **mnt, struct dentry **base) ...@@ -429,18 +417,27 @@ static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
return 0; return 0;
} }
mntget(parent); mntget(parent);
dentry=dget((*mnt)->mnt_mountpoint); mountpoint=dget((*mnt)->mnt_mountpoint);
spin_unlock(&dcache_lock); spin_unlock(&dcache_lock);
dput(*base); dput(*dentry);
*base = dentry; *dentry = mountpoint;
mntput(*mnt); mntput(*mnt);
*mnt = parent; *mnt = parent;
return 1; return 1;
} }
int follow_up(struct vfsmount **mnt, struct dentry **dentry) static int follow_mount(struct vfsmount **mnt, struct dentry **dentry)
{ {
return __follow_up(mnt, dentry); int res = 0;
while (d_mountpoint(*dentry)) {
struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
if (!mounted)
break;
*mnt = mounted;
*dentry = mounted->mnt_root;
res = 1;
}
return res;
} }
static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
...@@ -466,41 +463,83 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry) ...@@ -466,41 +463,83 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
return __follow_down(mnt,dentry); return __follow_down(mnt,dentry);
} }
static inline void follow_dotdot(struct nameidata *nd) static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry)
{ {
while(1) { while(1) {
struct vfsmount *parent; struct vfsmount *parent;
struct dentry *dentry; if (*dentry == current->fs->root &&
read_lock(&current->fs->lock); *mnt == current->fs->rootmnt)
if (nd->dentry == current->fs->root &&
nd->mnt == current->fs->rootmnt) {
read_unlock(&current->fs->lock);
break; break;
} if (*dentry != (*mnt)->mnt_root) {
read_unlock(&current->fs->lock); *dentry = (*dentry)->d_parent;
spin_lock(&dcache_lock);
if (nd->dentry != nd->mnt->mnt_root) {
dentry = dget(nd->dentry->d_parent);
spin_unlock(&dcache_lock);
dput(nd->dentry);
nd->dentry = dentry;
break; break;
} }
parent=nd->mnt->mnt_parent; parent=(*mnt)->mnt_parent;
if (parent == nd->mnt) { if (parent == *mnt)
spin_unlock(&dcache_lock);
break; break;
} *dentry=(*mnt)->mnt_mountpoint;
mntget(parent); *mnt = parent;
dentry=dget(nd->mnt->mnt_mountpoint);
spin_unlock(&dcache_lock);
dput(nd->dentry);
nd->dentry = dentry;
mntput(nd->mnt);
nd->mnt = parent;
} }
while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry)) follow_mount(mnt, dentry);
; }
struct path {
struct vfsmount *mnt;
struct dentry *dentry;
};
/*
* It's more convoluted than I'd like it to be, but... it's still fairly
* small and for now I'd prefer to have fast path as straight as possible.
* It _is_ time-critical.
*/
static int do_lookup(struct nameidata *nd, struct qstr *name,
struct path *path, struct path *cached_path,
int flags)
{
struct vfsmount *mnt = nd->mnt;
struct dentry *dentry = __d_lookup(nd->dentry, name);
if (!dentry)
goto dcache_miss;
if (dentry->d_op && dentry->d_op->d_revalidate)
goto need_revalidate;
done:
path->mnt = mnt;
path->dentry = dentry;
return 0;
dcache_miss:
unlock_nd(nd);
need_lookup:
dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE);
if (IS_ERR(dentry))
goto fail;
mntget(mnt);
relock:
dput(cached_path->dentry);
mntput(cached_path->mnt);
cached_path->mnt = mnt;
cached_path->dentry = dentry;
lock_nd(nd);
goto done;
need_revalidate:
mntget(mnt);
dget_locked(dentry);
unlock_nd(nd);
if (dentry->d_op->d_revalidate(dentry, flags))
goto relock;
if (d_invalidate(dentry))
goto relock;
dput(dentry);
mntput(mnt);
goto need_lookup;
fail:
lock_nd(nd);
return PTR_ERR(dentry);
} }
/* /*
...@@ -513,11 +552,11 @@ static inline void follow_dotdot(struct nameidata *nd) ...@@ -513,11 +552,11 @@ static inline void follow_dotdot(struct nameidata *nd)
*/ */
int link_path_walk(const char * name, struct nameidata *nd) int link_path_walk(const char * name, struct nameidata *nd)
{ {
struct dentry *dentry; struct path next, pinned = {NULL, NULL};
struct inode *inode; struct inode *inode;
int err; int err;
unsigned int lookup_flags = nd->flags; unsigned int lookup_flags = nd->flags;
while (*name=='/') while (*name=='/')
name++; name++;
if (!*name) if (!*name)
...@@ -534,11 +573,11 @@ int link_path_walk(const char * name, struct nameidata *nd) ...@@ -534,11 +573,11 @@ int link_path_walk(const char * name, struct nameidata *nd)
unsigned int c; unsigned int c;
err = exec_permission_lite(inode); err = exec_permission_lite(inode);
if(err){ if (err == -EAGAIN) {
undo_locked(nd); unlock_nd(nd);
err = permission(inode, MAY_EXEC); err = permission(inode, MAY_EXEC);
lock_nd(nd);
} }
dentry = ERR_PTR(err);
if (err) if (err)
break; break;
...@@ -572,8 +611,7 @@ int link_path_walk(const char * name, struct nameidata *nd) ...@@ -572,8 +611,7 @@ int link_path_walk(const char * name, struct nameidata *nd)
case 2: case 2:
if (this.name[1] != '.') if (this.name[1] != '.')
break; break;
undo_locked(nd); follow_dotdot(&nd->mnt, &nd->dentry);
follow_dotdot(nd);
inode = nd->dentry->d_inode; inode = nd->dentry->d_inode;
/* fallthrough */ /* fallthrough */
case 1: case 1:
...@@ -589,35 +627,30 @@ int link_path_walk(const char * name, struct nameidata *nd) ...@@ -589,35 +627,30 @@ int link_path_walk(const char * name, struct nameidata *nd)
break; break;
} }
/* This does the actual lookups.. */ /* This does the actual lookups.. */
dentry = cached_lookup_nd(nd, &this, LOOKUP_CONTINUE); err = do_lookup(nd, &this, &next, &pinned, LOOKUP_CONTINUE);
if (!dentry) { if (err)
undo_locked(nd); break;
dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
break;
}
/* Check mountpoints.. */ /* Check mountpoints.. */
if(d_mountpoint(dentry)){ follow_mount(&next.mnt, &next.dentry);
undo_locked(nd);
while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
;
}
err = -ENOENT; err = -ENOENT;
inode = dentry->d_inode; inode = next.dentry->d_inode;
if (!inode) if (!inode)
goto out_dput; break;
err = -ENOTDIR; err = -ENOTDIR;
if (!inode->i_op) if (!inode->i_op)
goto out_dput; break;
if (inode->i_op->follow_link) { if (inode->i_op->follow_link) {
undo_locked(nd); mntget(next.mnt);
err = do_follow_link(dentry, nd); dget_locked(next.dentry);
dput(dentry); unlock_nd(nd);
err = do_follow_link(next.dentry, nd);
dput(next.dentry);
mntput(next.mnt);
if (err) if (err)
goto return_err; goto return_err;
lock_nd(nd);
err = -ENOENT; err = -ENOENT;
inode = nd->dentry->d_inode; inode = nd->dentry->d_inode;
if (!inode) if (!inode)
...@@ -626,9 +659,8 @@ int link_path_walk(const char * name, struct nameidata *nd) ...@@ -626,9 +659,8 @@ int link_path_walk(const char * name, struct nameidata *nd)
if (!inode->i_op) if (!inode->i_op)
break; break;
} else { } else {
if (!(nd->flags & LOOKUP_LOCKED)) nd->mnt = next.mnt;
dput(nd->dentry); nd->dentry = next.dentry;
nd->dentry = dentry;
} }
err = -ENOTDIR; err = -ENOTDIR;
if (!inode->i_op->lookup) if (!inode->i_op->lookup)
...@@ -647,8 +679,7 @@ int link_path_walk(const char * name, struct nameidata *nd) ...@@ -647,8 +679,7 @@ int link_path_walk(const char * name, struct nameidata *nd)
case 2: case 2:
if (this.name[1] != '.') if (this.name[1] != '.')
break; break;
undo_locked(nd); follow_dotdot(&nd->mnt, &nd->dentry);
follow_dotdot(nd);
inode = nd->dentry->d_inode; inode = nd->dentry->d_inode;
/* fallthrough */ /* fallthrough */
case 1: case 1:
...@@ -659,27 +690,26 @@ int link_path_walk(const char * name, struct nameidata *nd) ...@@ -659,27 +690,26 @@ int link_path_walk(const char * name, struct nameidata *nd)
if (err < 0) if (err < 0)
break; break;
} }
dentry = cached_lookup_nd(nd, &this, 0); err = do_lookup(nd, &this, &next, &pinned, 0);
undo_locked(nd); if (err)
if (!dentry) { break;
dentry = real_lookup(nd->dentry, &this, 0); follow_mount(&next.mnt, &next.dentry);
err = PTR_ERR(dentry); inode = next.dentry->d_inode;
if (IS_ERR(dentry))
break;
}
while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
;
inode = dentry->d_inode;
if ((lookup_flags & LOOKUP_FOLLOW) if ((lookup_flags & LOOKUP_FOLLOW)
&& inode && inode->i_op && inode->i_op->follow_link) { && inode && inode->i_op && inode->i_op->follow_link) {
err = do_follow_link(dentry, nd); mntget(next.mnt);
dput(dentry); dget_locked(next.dentry);
unlock_nd(nd);
err = do_follow_link(next.dentry, nd);
dput(next.dentry);
mntput(next.mnt);
if (err) if (err)
goto return_err; goto return_err;
inode = nd->dentry->d_inode; inode = nd->dentry->d_inode;
lock_nd(nd);
} else { } else {
dput(nd->dentry); nd->mnt = next.mnt;
nd->dentry = dentry; nd->dentry = next.dentry;
} }
err = -ENOENT; err = -ENOENT;
if (!inode) if (!inode)
...@@ -700,22 +730,23 @@ int link_path_walk(const char * name, struct nameidata *nd) ...@@ -700,22 +730,23 @@ int link_path_walk(const char * name, struct nameidata *nd)
else if (this.len == 2 && this.name[1] == '.') else if (this.len == 2 && this.name[1] == '.')
nd->last_type = LAST_DOTDOT; nd->last_type = LAST_DOTDOT;
return_base: return_base:
undo_locked(nd); unlock_nd(nd);
dput(pinned.dentry);
mntput(pinned.mnt);
return 0; return 0;
out_dput:
undo_locked(nd);
dput(dentry);
break;
} }
undo_locked(nd); unlock_nd(nd);
path_release(nd); path_release(nd);
return_err: return_err:
dput(pinned.dentry);
mntput(pinned.mnt);
return err; return err;
} }
int path_walk(const char * name, struct nameidata *nd) int path_walk(const char * name, struct nameidata *nd)
{ {
current->total_link_count = 0; current->total_link_count = 0;
lock_nd(nd);
return link_path_walk(name, nd); return link_path_walk(name, nd);
} }
...@@ -803,6 +834,8 @@ walk_init_root(const char *name, struct nameidata *nd) ...@@ -803,6 +834,8 @@ walk_init_root(const char *name, struct nameidata *nd)
int path_init(const char *name, unsigned int flags, struct nameidata *nd) int path_init(const char *name, unsigned int flags, struct nameidata *nd)
{ {
nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->old_mnt = NULL;
nd->old_dentry = NULL;
nd->flags = flags; nd->flags = flags;
if (*name=='/') if (*name=='/')
return walk_init_root(name,nd); return walk_init_root(name,nd);
...@@ -817,7 +850,7 @@ int path_lookup(const char *name, unsigned int flags, struct nameidata *nd) ...@@ -817,7 +850,7 @@ int path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
{ {
nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags; nd->flags = flags;
if (*name=='/'){ if (*name=='/') {
read_lock(&current->fs->lock); read_lock(&current->fs->lock);
if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
nd->mnt = mntget(current->fs->altrootmnt); nd->mnt = mntget(current->fs->altrootmnt);
...@@ -827,20 +860,20 @@ int path_lookup(const char *name, unsigned int flags, struct nameidata *nd) ...@@ -827,20 +860,20 @@ int path_lookup(const char *name, unsigned int flags, struct nameidata *nd)
return 0; return 0;
read_lock(&current->fs->lock); read_lock(&current->fs->lock);
} }
spin_lock(&dcache_lock); /*to avoid cacheline bouncing with d_count*/ read_unlock(&current->fs->lock);
spin_lock(&dcache_lock);
nd->mnt = current->fs->rootmnt; nd->mnt = current->fs->rootmnt;
nd->dentry = current->fs->root; nd->dentry = current->fs->root;
read_unlock(&current->fs->lock);
} }
else{ else{
read_lock(&current->fs->lock);
spin_lock(&dcache_lock); spin_lock(&dcache_lock);
nd->mnt = current->fs->pwdmnt; nd->mnt = current->fs->pwdmnt;
nd->dentry = current->fs->pwd; nd->dentry = current->fs->pwd;
read_unlock(&current->fs->lock);
} }
nd->flags |= LOOKUP_LOCKED; nd->old_mnt = NULL;
return (path_walk(name, nd)); nd->old_dentry = NULL;
current->total_link_count = 0;
return link_path_walk(name, nd);
} }
/* /*
...@@ -2029,6 +2062,7 @@ __vfs_follow_link(struct nameidata *nd, const char *link) ...@@ -2029,6 +2062,7 @@ __vfs_follow_link(struct nameidata *nd, const char *link)
/* weird __emul_prefix() stuff did it */ /* weird __emul_prefix() stuff did it */
goto out; goto out;
} }
lock_nd(nd);
res = link_path_walk(link, nd); res = link_path_walk(link, nd);
out: out:
if (current->link_count || res || nd->last_type!=LAST_NORM) if (current->link_count || res || nd->last_type!=LAST_NORM)
......
...@@ -654,6 +654,8 @@ struct nameidata { ...@@ -654,6 +654,8 @@ struct nameidata {
struct qstr last; struct qstr last;
unsigned int flags; unsigned int flags;
int last_type; int last_type;
struct dentry *old_dentry;
struct vfsmount *old_mnt;
}; };
#define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */
...@@ -1395,7 +1397,6 @@ extern ino_t find_inode_number(struct dentry *, struct qstr *); ...@@ -1395,7 +1397,6 @@ extern ino_t find_inode_number(struct dentry *, struct qstr *);
#define LOOKUP_CONTINUE (4) #define LOOKUP_CONTINUE (4)
#define LOOKUP_PARENT (16) #define LOOKUP_PARENT (16)
#define LOOKUP_NOALT (32) #define LOOKUP_NOALT (32)
#define LOOKUP_LOCKED (64)
/* /*
* Type of the last component on LOOKUP_PARENT * Type of the last component on LOOKUP_PARENT
......
...@@ -35,10 +35,12 @@ static inline void set_fs_root(struct fs_struct *fs, ...@@ -35,10 +35,12 @@ static inline void set_fs_root(struct fs_struct *fs,
struct dentry *old_root; struct dentry *old_root;
struct vfsmount *old_rootmnt; struct vfsmount *old_rootmnt;
write_lock(&fs->lock); write_lock(&fs->lock);
spin_lock(&dcache_lock);
old_root = fs->root; old_root = fs->root;
old_rootmnt = fs->rootmnt; old_rootmnt = fs->rootmnt;
fs->rootmnt = mntget(mnt); fs->rootmnt = mntget(mnt);
fs->root = dget(dentry); fs->root = dget(dentry);
spin_unlock(&dcache_lock);
write_unlock(&fs->lock); write_unlock(&fs->lock);
if (old_root) { if (old_root) {
dput(old_root); dput(old_root);
...@@ -58,10 +60,12 @@ static inline void set_fs_pwd(struct fs_struct *fs, ...@@ -58,10 +60,12 @@ static inline void set_fs_pwd(struct fs_struct *fs,
struct dentry *old_pwd; struct dentry *old_pwd;
struct vfsmount *old_pwdmnt; struct vfsmount *old_pwdmnt;
write_lock(&fs->lock); write_lock(&fs->lock);
spin_lock(&dcache_lock);
old_pwd = fs->pwd; old_pwd = fs->pwd;
old_pwdmnt = fs->pwdmnt; old_pwdmnt = fs->pwdmnt;
fs->pwdmnt = mntget(mnt); fs->pwdmnt = mntget(mnt);
fs->pwd = dget(dentry); fs->pwd = dget(dentry);
spin_unlock(&dcache_lock);
write_unlock(&fs->lock); write_unlock(&fs->lock);
if (old_pwd) { if (old_pwd) {
dput(old_pwd); dput(old_pwd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment