Commit 2105c282 authored by David Howells's avatar David Howells

afs: Fix race between post-modification dir edit and readdir/d_revalidate

AFS directories are retained locally as a structured file, with lookup
being effected by a local search of the file contents.  When a modification
(such as mkdir) happens, the dir file content is modified locally rather
than redownloading the directory.

The directory contents are accessed in a number of ways, with a number of
different locks schemes:

 (1) Download of contents - dvnode->validate_lock/write in afs_read_dir().

 (2) Lookup and readdir - dvnode->validate_lock/read in afs_dir_iterate(),
     downgrading from (1) if necessary.

 (3) d_revalidate of child dentry - dvnode->validate_lock/read in
     afs_do_lookup_one() downgrading from (1) if necessary.

 (4) Edit of dir after modification - page locks on individual dir pages.

Unfortunately, because (4) uses different locking scheme to (1) - (3),
nothing protects against the page being scanned whilst the edit is
underway.  Even download is not safe as it doesn't lock the pages - relying
instead on the validate_lock to serialise as a whole (the theory being that
directory contents are treated as a block and always downloaded as a
block).

Fix this by write-locking dvnode->validate_lock around the edits.  Care
must be taken in the rename case as there may be two different dirs - but
they need not be locked at the same time.  In any case, once the lock is
taken, the directory version must be rechecked, and the edit skipped if a
later version has been downloaded by revalidation (there can't have been
any local changes because the VFS holds the inode lock, but there can have
been remote changes).

Fixes: 63a4681f ("afs: Locally edit directory data for mkdir/create/unlink/...")
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent 3efe55b0
...@@ -1275,6 +1275,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ...@@ -1275,6 +1275,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct afs_fs_cursor fc; struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key; struct key *key;
afs_dataversion_t data_version;
int ret; int ret;
mode |= S_IFDIR; mode |= S_IFDIR;
...@@ -1295,7 +1296,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ...@@ -1295,7 +1296,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1; data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) { while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode); fc.cb_break = afs_calc_vnode_cb_break(dvnode);
...@@ -1316,10 +1317,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ...@@ -1316,10 +1317,14 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto error_key; goto error_key;
} }
if (ret == 0 && if (ret == 0) {
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) down_write(&dvnode->validate_lock);
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
afs_edit_dir_for_create); dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create);
up_write(&dvnode->validate_lock);
}
key_put(key); key_put(key);
kfree(scb); kfree(scb);
...@@ -1360,6 +1365,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ...@@ -1360,6 +1365,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
struct afs_fs_cursor fc; struct afs_fs_cursor fc;
struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL; struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode = NULL;
struct key *key; struct key *key;
afs_dataversion_t data_version;
int ret; int ret;
_enter("{%llx:%llu},{%pd}", _enter("{%llx:%llu},{%pd}",
...@@ -1391,7 +1397,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ...@@ -1391,7 +1397,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1; data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) { while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode); fc.cb_break = afs_calc_vnode_cb_break(dvnode);
...@@ -1404,9 +1410,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ...@@ -1404,9 +1410,12 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
ret = afs_end_vnode_operation(&fc); ret = afs_end_vnode_operation(&fc);
if (ret == 0) { if (ret == 0) {
afs_dir_remove_subdir(dentry); afs_dir_remove_subdir(dentry);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_remove(dvnode, &dentry->d_name, afs_edit_dir_remove(dvnode, &dentry->d_name,
afs_edit_dir_for_rmdir); afs_edit_dir_for_rmdir);
up_write(&dvnode->validate_lock);
} }
} }
...@@ -1544,10 +1553,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ...@@ -1544,10 +1553,15 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
ret = afs_end_vnode_operation(&fc); ret = afs_end_vnode_operation(&fc);
if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
ret = afs_dir_remove_link(dvnode, dentry, key); ret = afs_dir_remove_link(dvnode, dentry, key);
if (ret == 0 &&
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) if (ret == 0) {
afs_edit_dir_remove(dvnode, &dentry->d_name, down_write(&dvnode->validate_lock);
afs_edit_dir_for_unlink); if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_remove(dvnode, &dentry->d_name,
afs_edit_dir_for_unlink);
up_write(&dvnode->validate_lock);
}
} }
if (need_rehash && ret < 0 && ret != -ENOENT) if (need_rehash && ret < 0 && ret != -ENOENT)
...@@ -1573,6 +1587,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ...@@ -1573,6 +1587,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
struct afs_status_cb *scb; struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key; struct key *key;
afs_dataversion_t data_version;
int ret; int ret;
mode |= S_IFREG; mode |= S_IFREG;
...@@ -1597,7 +1612,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ...@@ -1597,7 +1612,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1; data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) { while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode); fc.cb_break = afs_calc_vnode_cb_break(dvnode);
...@@ -1618,9 +1633,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ...@@ -1618,9 +1633,12 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
goto error_key; goto error_key;
} }
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_create); afs_edit_dir_for_create);
up_write(&dvnode->validate_lock);
kfree(scb); kfree(scb);
key_put(key); key_put(key);
...@@ -1648,6 +1666,7 @@ static int afs_link(struct dentry *from, struct inode *dir, ...@@ -1648,6 +1666,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *dvnode = AFS_FS_I(dir);
struct afs_vnode *vnode = AFS_FS_I(d_inode(from)); struct afs_vnode *vnode = AFS_FS_I(d_inode(from));
struct key *key; struct key *key;
afs_dataversion_t data_version;
int ret; int ret;
_enter("{%llx:%llu},{%llx:%llu},{%pd}", _enter("{%llx:%llu},{%llx:%llu},{%pd}",
...@@ -1672,7 +1691,7 @@ static int afs_link(struct dentry *from, struct inode *dir, ...@@ -1672,7 +1691,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1; data_version = dvnode->status.data_version + 1;
if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) { if (mutex_lock_interruptible_nested(&vnode->io_lock, 1) < 0) {
afs_end_vnode_operation(&fc); afs_end_vnode_operation(&fc);
...@@ -1702,9 +1721,12 @@ static int afs_link(struct dentry *from, struct inode *dir, ...@@ -1702,9 +1721,12 @@ static int afs_link(struct dentry *from, struct inode *dir,
goto error_key; goto error_key;
} }
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid, afs_edit_dir_add(dvnode, &dentry->d_name, &vnode->fid,
afs_edit_dir_for_link); afs_edit_dir_for_link);
up_write(&dvnode->validate_lock);
key_put(key); key_put(key);
kfree(scb); kfree(scb);
...@@ -1732,6 +1754,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ...@@ -1732,6 +1754,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
struct afs_status_cb *scb; struct afs_status_cb *scb;
struct afs_vnode *dvnode = AFS_FS_I(dir); struct afs_vnode *dvnode = AFS_FS_I(dir);
struct key *key; struct key *key;
afs_dataversion_t data_version;
int ret; int ret;
_enter("{%llx:%llu},{%pd},%s", _enter("{%llx:%llu},{%pd},%s",
...@@ -1759,7 +1782,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ...@@ -1759,7 +1782,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t data_version = dvnode->status.data_version + 1; data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) { while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode); fc.cb_break = afs_calc_vnode_cb_break(dvnode);
...@@ -1780,9 +1803,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ...@@ -1780,9 +1803,12 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
goto error_key; goto error_key;
} }
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == data_version)
afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid, afs_edit_dir_add(dvnode, &dentry->d_name, &iget_data.fid,
afs_edit_dir_for_symlink); afs_edit_dir_for_symlink);
up_write(&dvnode->validate_lock);
key_put(key); key_put(key);
kfree(scb); kfree(scb);
...@@ -1812,6 +1838,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -1812,6 +1838,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *tmp = NULL, *rehash = NULL; struct dentry *tmp = NULL, *rehash = NULL;
struct inode *new_inode; struct inode *new_inode;
struct key *key; struct key *key;
afs_dataversion_t orig_data_version;
afs_dataversion_t new_data_version;
bool new_negative = d_is_negative(new_dentry); bool new_negative = d_is_negative(new_dentry);
int ret; int ret;
...@@ -1890,9 +1918,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -1890,9 +1918,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = -ERESTARTSYS; ret = -ERESTARTSYS;
if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
afs_dataversion_t orig_data_version;
afs_dataversion_t new_data_version;
orig_data_version = orig_dvnode->status.data_version + 1; orig_data_version = orig_dvnode->status.data_version + 1;
if (orig_dvnode != new_dvnode) { if (orig_dvnode != new_dvnode) {
...@@ -1928,18 +1953,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -1928,18 +1953,25 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret == 0) { if (ret == 0) {
if (rehash) if (rehash)
d_rehash(rehash); d_rehash(rehash);
if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags)) down_write(&orig_dvnode->validate_lock);
afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name, if (test_bit(AFS_VNODE_DIR_VALID, &orig_dvnode->flags) &&
afs_edit_dir_for_rename_0); orig_dvnode->status.data_version == orig_data_version)
afs_edit_dir_remove(orig_dvnode, &old_dentry->d_name,
afs_edit_dir_for_rename_0);
if (orig_dvnode != new_dvnode) {
up_write(&orig_dvnode->validate_lock);
if (!new_negative && down_write(&new_dvnode->validate_lock);
test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags)) }
afs_edit_dir_remove(new_dvnode, &new_dentry->d_name, if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags) &&
afs_edit_dir_for_rename_1); orig_dvnode->status.data_version == new_data_version) {
if (!new_negative)
afs_edit_dir_remove(new_dvnode, &new_dentry->d_name,
afs_edit_dir_for_rename_1);
if (test_bit(AFS_VNODE_DIR_VALID, &new_dvnode->flags))
afs_edit_dir_add(new_dvnode, &new_dentry->d_name, afs_edit_dir_add(new_dvnode, &new_dentry->d_name,
&vnode->fid, afs_edit_dir_for_rename_2); &vnode->fid, afs_edit_dir_for_rename_2);
}
new_inode = d_inode(new_dentry); new_inode = d_inode(new_dentry);
if (new_inode) { if (new_inode) {
...@@ -1958,6 +1990,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -1958,6 +1990,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
afs_update_dentry_version(&fc, old_dentry, &scb[1]); afs_update_dentry_version(&fc, old_dentry, &scb[1]);
afs_update_dentry_version(&fc, new_dentry, &scb[1]); afs_update_dentry_version(&fc, new_dentry, &scb[1]);
d_move(old_dentry, new_dentry); d_move(old_dentry, new_dentry);
up_write(&new_dvnode->validate_lock);
goto error_tmp; goto error_tmp;
} }
......
...@@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode ...@@ -21,6 +21,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
{ {
struct afs_fs_cursor fc; struct afs_fs_cursor fc;
struct afs_status_cb *scb; struct afs_status_cb *scb;
afs_dataversion_t dir_data_version;
int ret = -ERESTARTSYS; int ret = -ERESTARTSYS;
_enter("%pd,%pd", old, new); _enter("%pd,%pd", old, new);
...@@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode ...@@ -31,7 +32,7 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
trace_afs_silly_rename(vnode, false); trace_afs_silly_rename(vnode, false);
if (afs_begin_vnode_operation(&fc, dvnode, key, true)) { if (afs_begin_vnode_operation(&fc, dvnode, key, true)) {
afs_dataversion_t dir_data_version = dvnode->status.data_version + 1; dir_data_version = dvnode->status.data_version + 1;
while (afs_select_fileserver(&fc)) { while (afs_select_fileserver(&fc)) {
fc.cb_break = afs_calc_vnode_cb_break(dvnode); fc.cb_break = afs_calc_vnode_cb_break(dvnode);
...@@ -54,12 +55,15 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode ...@@ -54,12 +55,15 @@ static int afs_do_silly_rename(struct afs_vnode *dvnode, struct afs_vnode *vnode
dvnode->silly_key = key_get(key); dvnode->silly_key = key_get(key);
} }
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) down_write(&dvnode->validate_lock);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
dvnode->status.data_version == dir_data_version) {
afs_edit_dir_remove(dvnode, &old->d_name, afs_edit_dir_remove(dvnode, &old->d_name,
afs_edit_dir_for_silly_0); afs_edit_dir_for_silly_0);
if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
afs_edit_dir_add(dvnode, &new->d_name, afs_edit_dir_add(dvnode, &new->d_name,
&vnode->fid, afs_edit_dir_for_silly_1); &vnode->fid, afs_edit_dir_for_silly_1);
}
up_write(&dvnode->validate_lock);
} }
kfree(scb); kfree(scb);
...@@ -181,10 +185,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode ...@@ -181,10 +185,14 @@ static int afs_do_silly_unlink(struct afs_vnode *dvnode, struct afs_vnode *vnode
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
} }
} }
if (ret == 0 && if (ret == 0) {
test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) down_write(&dvnode->validate_lock);
afs_edit_dir_remove(dvnode, &dentry->d_name, if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags) &&
afs_edit_dir_for_unlink); dvnode->status.data_version == dir_data_version)
afs_edit_dir_remove(dvnode, &dentry->d_name,
afs_edit_dir_for_unlink);
up_write(&dvnode->validate_lock);
}
} }
kfree(scb); kfree(scb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment