Commit a8b00268 authored by Al Viro's avatar Al Viro

rename(): avoid a deadlock in the case of parents having no common ancestor

... and fix the directory locking documentation and proof of correctness.
Holding ->s_vfs_rename_mutex *almost* prevents ->d_parent changes; the
case where we really don't want it is splicing the root of disconnected
tree to somewhere.

In other words, ->s_vfs_rename_mutex is sufficient to stabilize "X is an
ancestor of Y" only if X and Y are already in the same tree.  Otherwise
it can go from false to true, and one can construct a deadlock on that.

Make lock_two_directories() report an error in such case and update the
callers of lock_rename()/lock_rename_child() to handle such errors.

And yes, such conditions are not impossible to create ;-/
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent dbd4540d
......@@ -1079,3 +1079,12 @@ On same-directory ->rename() the (tautological) update of .. is not protected
by any locks; just don't do it if the old parent is the same as the new one.
We really can't lock two subdirectories in same-directory rename - not without
deadlocks.
---
**mandatory**
lock_rename() and lock_rename_child() may fail in cross-directory case, if
their arguments do not have a common ancestor. In that case ERR_PTR(-EXDEV)
is returned, with no locks taken. In-tree users updated; out-of-tree ones
would need to do so.
......@@ -305,6 +305,8 @@ int cachefiles_bury_object(struct cachefiles_cache *cache,
/* do the multiway lock magic */
trap = lock_rename(cache->graveyard, dir);
if (IS_ERR(trap))
return PTR_ERR(trap);
/* do some checks before getting the grave dentry */
if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
......
......@@ -599,6 +599,8 @@ ecryptfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
target_inode = d_inode(new_dentry);
trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
if (IS_ERR(trap))
return PTR_ERR(trap);
dget(lower_new_dentry);
rc = -EINVAL;
if (lower_old_dentry->d_parent != lower_old_dir_dentry)
......
......@@ -3014,21 +3014,37 @@ static inline int may_create(struct mnt_idmap *idmap,
return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC);
}
// p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held
static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2)
{
struct dentry *p;
struct dentry *p = p1, *q = p2, *r;
p = d_ancestor(p2, p1);
if (p) {
while ((r = p->d_parent) != p2 && r != p)
p = r;
if (r == p2) {
// p is a child of p2 and an ancestor of p1 or p1 itself
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
return p;
}
p = d_ancestor(p1, p2);
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return p;
// p is the root of connected component that contains p1
// p2 does not occur on the path from p to p1
while ((r = q->d_parent) != p1 && r != p && r != q)
q = r;
if (r == p1) {
// q is a child of p1 and an ancestor of p2 or p2 itself
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return q;
} else if (likely(r == p)) {
// both p2 and p1 are descendents of p
inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
return NULL;
} else { // no common ancestor at the time we'd been called
mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
return ERR_PTR(-EXDEV);
}
}
/*
......@@ -4947,6 +4963,10 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
retry_deleg:
trap = lock_rename(new_path.dentry, old_path.dentry);
if (IS_ERR(trap)) {
error = PTR_ERR(trap);
goto exit_lock_rename;
}
old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
lookup_flags);
......@@ -5014,6 +5034,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
dput(old_dentry);
exit3:
unlock_rename(new_path.dentry, old_path.dentry);
exit_lock_rename:
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
......
......@@ -1813,6 +1813,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
}
trap = lock_rename(tdentry, fdentry);
if (IS_ERR(trap)) {
err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
goto out;
}
err = fh_fill_pre_attrs(ffhp);
if (err != nfs_ok)
goto out_unlock;
......
......@@ -722,7 +722,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
struct inode *inode;
struct inode *udir = d_inode(c->destdir), *wdir = d_inode(c->workdir);
struct path path = { .mnt = ovl_upper_mnt(ofs) };
struct dentry *temp, *upper;
struct dentry *temp, *upper, *trap;
struct ovl_cu_creds cc;
int err;
struct ovl_cattr cattr = {
......@@ -760,9 +760,11 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
* If temp was moved, abort without the cleanup.
*/
ovl_start_write(c->dentry);
if (lock_rename(c->workdir, c->destdir) != NULL ||
temp->d_parent != c->workdir) {
trap = lock_rename(c->workdir, c->destdir);
if (trap || temp->d_parent != c->workdir) {
err = -EIO;
if (IS_ERR(trap))
goto out;
goto unlock;
} else if (err) {
goto cleanup;
......@@ -803,6 +805,7 @@ static int ovl_copy_up_workdir(struct ovl_copy_up_ctx *c)
ovl_set_flag(OVL_WHITEOUTS, inode);
unlock:
unlock_rename(c->workdir, c->destdir);
out:
ovl_end_write(c->dentry);
return err;
......
......@@ -1180,6 +1180,10 @@ static int ovl_rename(struct mnt_idmap *idmap, struct inode *olddir,
}
trap = lock_rename(new_upperdir, old_upperdir);
if (IS_ERR(trap)) {
err = PTR_ERR(trap);
goto out_revert_creds;
}
olddentry = ovl_lookup_upper(ofs, old->d_name.name, old_upperdir,
old->d_name.len);
......
......@@ -439,8 +439,10 @@ static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
bool ok = false;
if (workdir != upperdir) {
ok = (lock_rename(workdir, upperdir) == NULL);
unlock_rename(workdir, upperdir);
struct dentry *trap = lock_rename(workdir, upperdir);
if (!IS_ERR(trap))
unlock_rename(workdir, upperdir);
ok = (trap == NULL);
}
return ok;
}
......
......@@ -1198,12 +1198,17 @@ void ovl_nlink_end(struct dentry *dentry)
int ovl_lock_rename_workdir(struct dentry *workdir, struct dentry *upperdir)
{
struct dentry *trap;
/* Workdir should not be the same as upperdir */
if (workdir == upperdir)
goto err;
/* Workdir should not be subdir of upperdir and vice versa */
if (lock_rename(workdir, upperdir) != NULL)
trap = lock_rename(workdir, upperdir);
if (IS_ERR(trap))
goto err;
if (trap)
goto err_unlock;
return 0;
......
......@@ -708,6 +708,10 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
goto out2;
trap = lock_rename_child(old_child, new_path.dentry);
if (IS_ERR(trap)) {
err = PTR_ERR(trap);
goto out_drop_write;
}
old_parent = dget(old_child->d_parent);
if (d_unhashed(old_child)) {
......@@ -770,6 +774,7 @@ int ksmbd_vfs_rename(struct ksmbd_work *work, const struct path *old_path,
out3:
dput(old_parent);
unlock_rename(old_parent, new_path.dentry);
out_drop_write:
mnt_drop_write(old_path->mnt);
out2:
path_put(&new_path);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment