Commit 5bb0bc7c authored by Trond Myklebust's avatar Trond Myklebust

NFSv4: Atomic open(). Fixes races w.r.t. opening files.

parent ab91d13d
......@@ -72,6 +72,26 @@ struct inode_operations nfs_dir_inode_operations = {
.setattr = nfs_setattr,
};
#ifdef CONFIG_NFS_V4
static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_atomic_lookup,
.link = nfs_link,
.unlink = nfs_unlink,
.symlink = nfs_symlink,
.mkdir = nfs_mkdir,
.rmdir = nfs_rmdir,
.mknod = nfs_mknod,
.rename = nfs_rename,
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
#endif /* CONFIG_NFS_V4 */
/*
* Open file
*/
......@@ -670,7 +690,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
goto out;
error = -ENOMEM;
dentry->d_op = &nfs_dentry_operations;
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
lock_kernel();
......@@ -702,6 +722,119 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
return ERR_PTR(error);
}
#ifdef CONFIG_NFS_V4
static int nfs_open_revalidate(struct dentry *, struct nameidata *);
struct dentry_operations nfs4_dentry_operations = {
.d_revalidate = nfs_open_revalidate,
.d_delete = nfs_dentry_delete,
.d_iput = nfs_dentry_iput,
};
static int is_atomic_open(struct inode *dir, struct nameidata *nd)
{
if (!nd)
return 0;
/* Check that we are indeed trying to open this file */
if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN))
return 0;
/* NFS does not (yet) have a stateful open for directories */
if (nd->flags & LOOKUP_DIRECTORY)
return 0;
/* Are we trying to write to a read only partition? */
if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
return 0;
return 1;
}
static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = NULL;
int error = 0;
/* Check that we are indeed trying to open this file */
if (!is_atomic_open(dir, nd))
goto no_open;
if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
error = -ENAMETOOLONG;
goto out;
}
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
/* Let vfs_create() deal with O_EXCL */
if (nd->intent.open.flags & O_EXCL)
goto no_entry;
/* Open the file on the server */
lock_kernel();
inode = nfs4_atomic_open(dir, dentry, nd);
unlock_kernel();
if (IS_ERR(inode)) {
error = PTR_ERR(inode);
switch (error) {
/* Make a negative dentry */
case -ENOENT:
inode = NULL;
break;
/* This turned out not to be a regular file */
case -ELOOP:
if (!(nd->intent.open.flags & O_NOFOLLOW))
goto no_open;
/* case -EISDIR: */
/* case -EINVAL: */
default:
goto out;
}
}
no_entry:
d_add(dentry, inode);
nfs_renew_times(dentry);
out:
BUG_ON(error > 0);
return ERR_PTR(error);
no_open:
return nfs_lookup(dir, dentry, nd);
}
static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
{
struct dentry *parent = NULL;
struct inode *inode = dentry->d_inode;
int openflags, ret = 0;
/* NFS only supports OPEN for regular files */
if (inode && !S_ISREG(inode->i_mode))
goto no_open;
parent = dget_parent(dentry);
if (!is_atomic_open(parent->d_inode, nd))
goto no_open;
openflags = nd->intent.open.flags;
if (openflags & O_CREAT) {
/* If this is a negative dentry, just drop it */
if (!inode)
goto out;
/* If this is exclusive open, just revalidate */
if (openflags & O_EXCL)
goto no_open;
}
/* We can't create new files, or truncate existing ones here */
openflags &= ~(O_CREAT|O_TRUNC);
lock_kernel();
ret = nfs4_open_revalidate(parent->d_inode, dentry, openflags);
unlock_kernel();
out:
dput(parent);
if (!ret)
d_drop(dentry);
return ret;
no_open:
dput(parent);
return nfs_lookup_revalidate(dentry, nd);
}
#endif /* CONFIG_NFSV4 */
static inline
int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry)
{
......@@ -1306,6 +1439,9 @@ nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
/* We only need to check permissions on file open() and access() */
if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS)))
return 0;
/* NFSv4 has atomic_open... */
if (NFS_PROTO(inode)->version > 3 && (nd->flags & LOOKUP_OPEN))
return 0;
}
lock_kernel();
......
......@@ -303,7 +303,6 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
server = NFS_SB(sb);
sb->s_magic = NFS_SUPER_MAGIC;
sb->s_op = &nfs_sops;
/* Did getting the root inode fail? */
if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0)
......@@ -312,7 +311,7 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
if (!sb->s_root)
goto out_no_root;
sb->s_root->d_op = &nfs_dentry_operations;
sb->s_root->d_op = server->rpc_ops->dentry_ops;
/* Get some general file system info */
if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) {
......@@ -513,6 +512,7 @@ nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent)
goto out_shutdown;
}
sb->s_op = &nfs_sops;
err = nfs_sb_init(sb, authflavor);
if (err != 0)
goto out_noinit;
......@@ -745,7 +745,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_data.a_ops = &nfs_file_aops;
inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info;
} else if (S_ISDIR(inode->i_mode)) {
inode->i_op = &nfs_dir_inode_operations;
inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops;
inode->i_fop = &nfs_dir_operations;
if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
&& fattr->size <= NFS_LIMIT_READDIRPLUS)
......@@ -837,7 +837,12 @@ printk("nfs_setattr: revalidate failed, error=%d\n", error);
filemap_fdatawait(inode->i_mapping);
if (error)
goto out;
/* Optimize away unnecessary truncates */
if ((attr->ia_valid & ATTR_SIZE) && i_size_read(inode) == attr->ia_size)
attr->ia_valid &= ~ATTR_SIZE;
}
if (!attr->ia_valid)
goto out;
error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
if (error)
......@@ -1357,6 +1362,48 @@ static struct file_system_type nfs_fs_type = {
#ifdef CONFIG_NFS_V4
static void nfs4_clear_inode(struct inode *);
static struct super_operations nfs4_sops = {
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs_write_inode,
.delete_inode = nfs_delete_inode,
.put_super = nfs_put_super,
.statfs = nfs_statfs,
.clear_inode = nfs4_clear_inode,
.umount_begin = nfs_umount_begin,
.show_options = nfs_show_options,
};
/*
* Clean out any remaining NFSv4 state that might be left over due
* to open() calls that passed nfs_atomic_lookup, but failed to call
* nfs_open().
*/
static void nfs4_clear_inode(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
while (!list_empty(&nfsi->open_states)) {
struct nfs4_state *state;
state = list_entry(nfsi->open_states.next,
struct nfs4_state,
inode_states);
dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
__FUNCTION__,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
state);
list_del(&state->inode_states);
nfs4_put_open_state(state);
}
/* Now call standard NFS clear_inode() code */
nfs_clear_inode(inode);
}
static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent)
{
struct nfs_server *server;
......@@ -1481,6 +1528,7 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
if ((server->idmap = nfs_idmap_new(server)) == NULL)
printk(KERN_WARNING "NFS: couldn't start IDmap\n");
sb->s_op = &nfs4_sops;
err = nfs_sb_init(sb, authflavour);
if (err == 0)
return 0;
......
......@@ -898,6 +898,8 @@ nfs3_request_compatible(struct nfs_page *req, struct file *filp, struct page *pa
struct nfs_rpc_ops nfs_v3_clientops = {
.version = 3, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs_dir_inode_operations,
.getroot = nfs3_proc_get_root,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
......
......@@ -45,6 +45,7 @@
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/smp_lock.h>
#include <linux/namei.h>
#define NFSDBG_FACILITY NFSDBG_PROC
......@@ -509,6 +510,9 @@ nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state)
return status;
}
/*
* Returns an nfs4_state + an referenced inode
*/
struct nfs4_state *
nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred)
{
......@@ -617,19 +621,23 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt
up(&sp->so_sema);
nfs4_put_state_owner(sp);
iput(inode);
return state;
out_up:
up(&sp->so_sema);
nfs4_put_state_owner(sp);
if (state)
if (state) {
nfs4_put_open_state(state);
if (inode)
state = NULL;
}
if (inode) {
iput(inode);
inode = NULL;
}
status = nfs4_handle_error(server, status);
if (!status)
goto retry;
BUG_ON(status < -1000 || status > 0);
out:
return ERR_PTR(status);
}
......@@ -718,6 +726,56 @@ nfs4_do_close(struct inode *inode, struct nfs4_state *state)
return status;
}
struct inode *
nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
struct iattr attr;
struct rpc_cred *cred;
struct nfs4_state *state;
if (nd->flags & LOOKUP_CREATE) {
attr.ia_mode = nd->intent.open.create_mode;
attr.ia_valid = ATTR_MODE;
if (!IS_POSIXACL(dir))
attr.ia_mode &= ~current->fs->umask;
} else {
attr.ia_valid = 0;
BUG_ON(nd->intent.open.flags & O_CREAT);
}
cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
put_rpccred(cred);
if (IS_ERR(state))
return (struct inode *)state;
return state->inode;
}
int
nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
{
struct rpc_cred *cred;
struct nfs4_state *state;
struct inode *inode;
cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred);
put_rpccred(cred);
if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
return 1;
if (IS_ERR(state))
return 0;
inode = state->inode;
if (inode == dentry->d_inode) {
iput(inode);
return 1;
}
d_drop(dentry);
nfs4_put_open_state(state);
iput(inode);
return 0;
}
static int
nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fattr *fattr)
......@@ -808,28 +866,39 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
struct inode * inode = dentry->d_inode;
int size_change = sattr->ia_valid & ATTR_SIZE;
struct nfs4_state *state = NULL;
int status;
int need_iput = 0;
int status;
fattr->valid = 0;
if (size_change) {
struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
state = nfs4_do_open(dentry->d_parent->d_inode,
state = nfs4_find_state_bypid(inode, current->pid);
if (!state) {
struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
state = nfs4_do_open(dentry->d_parent->d_inode,
&dentry->d_name, FMODE_WRITE, NULL, cred);
put_rpccred(cred);
put_rpccred(cred);
need_iput = 1;
}
if (IS_ERR(state))
return PTR_ERR(state);
if (state->inode != inode) {
printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n");
nfs4_put_open_state(state);
return -EIO;
printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode);
status = -EIO;
goto out;
}
}
status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
NFS_FH(inode), sattr, state);
if (state)
out:
if (state) {
inode = state->inode;
nfs4_put_open_state(state);
if (need_iput)
iput(inode);
}
return status;
}
......@@ -1085,18 +1154,18 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
state = nfs4_do_open(dir, name, flags, sattr, cred);
put_rpccred(cred);
if (!IS_ERR(state)) {
inode = igrab(state->inode);
inode = state->inode;
if (flags & O_EXCL) {
struct nfs_fattr fattr;
int status;
status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
NFS_FH(inode), sattr, state);
if (status != 0) {
nfs4_put_open_state(state);
iput(inode);
inode = ERR_PTR(status);
}
}
nfs4_put_open_state(state);
} else
inode = (struct inode *)state;
return inode;
......@@ -1672,43 +1741,28 @@ static int
nfs4_proc_file_open(struct inode *inode, struct file *filp)
{
struct dentry *dentry = filp->f_dentry;
struct inode *dir = dentry->d_parent->d_inode;
struct rpc_cred *cred;
struct nfs4_state *state;
int flags = filp->f_flags;
int status = 0;
dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
(int)dentry->d_parent->d_name.len,
dentry->d_parent->d_name.name,
(int)dentry->d_name.len, dentry->d_name.name);
if ((flags + 1) & O_ACCMODE)
flags++;
lock_kernel();
/*
* We have already opened the file "O_EXCL" in nfs4_proc_create!!
* This ugliness will go away with lookup-intent...
*/
cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
state = nfs4_do_open(dir, &dentry->d_name, flags, NULL, cred);
if (IS_ERR(state)) {
status = PTR_ERR(state);
state = NULL;
} else if (filp->f_mode & FMODE_WRITE)
nfs_set_mmcred(inode, cred);
if (inode != filp->f_dentry->d_inode) {
/* Find our open stateid */
state = nfs4_find_state_bypid(inode, current->pid);
if (state == NULL) {
printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
status = -EIO; /* ERACE actually */
nfs4_put_open_state(state);
state = NULL;
return -EIO; /* ERACE actually */
}
nfs4_put_open_state(state);
if (filp->f_mode & FMODE_WRITE) {
lock_kernel();
nfs_set_mmcred(inode, state->owner->so_cred);
unlock_kernel();
}
filp->private_data = state;
put_rpccred(cred);
unlock_kernel();
return status;
return 0;
}
/*
......@@ -1922,6 +1976,8 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp)
struct nfs_rpc_ops nfs_v4_clientops = {
.version = 4, /* protocol version */
.dentry_ops = &nfs4_dentry_operations,
.dir_inode_ops = &nfs4_dir_inode_operations,
.getroot = nfs4_proc_get_root,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
......
......@@ -349,7 +349,6 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
atomic_inc(&owner->so_count);
list_add(&state->inode_states, &nfsi->open_states);
state->inode = inode;
atomic_inc(&inode->i_count);
spin_unlock(&inode->i_lock);
} else {
spin_unlock(&inode->i_lock);
......@@ -384,7 +383,6 @@ nfs4_put_open_state(struct nfs4_state *state)
} while (!status);
}
up(&owner->so_sema);
iput(inode);
nfs4_free_open_state(state);
nfs4_put_state_owner(owner);
}
......
......@@ -656,6 +656,8 @@ nfs_request_compatible(struct nfs_page *req, struct file *filp, struct page *pag
struct nfs_rpc_ops nfs_v2_clientops = {
.version = 2, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
.dir_inode_ops = &nfs_dir_inode_operations,
.getroot = nfs_proc_get_root,
.getattr = nfs_proc_getattr,
.setattr = nfs_proc_setattr,
......
......@@ -558,6 +558,9 @@ struct nfs4_state {
};
extern struct dentry_operations nfs4_dentry_operations;
extern struct inode_operations nfs4_dir_inode_operations;
/* nfs4proc.c */
extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
......@@ -566,6 +569,8 @@ extern int nfs4_proc_async_renew(struct nfs4_client *);
extern int nfs4_proc_renew(struct nfs4_client *);
extern int nfs4_do_close(struct inode *, struct nfs4_state *);
extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
/* nfs4renewd.c */
extern void nfs4_schedule_state_renewal(struct nfs4_client *);
......@@ -581,6 +586,7 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *);
extern struct nfs4_state *nfs4_find_state_bypid(struct inode *, pid_t);
extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
extern int nfs4_handle_error(struct nfs_server *, int);
extern void nfs4_schedule_state_recovery(struct nfs4_client *);
......
......@@ -637,6 +637,8 @@ struct nfs_page;
*/
struct nfs_rpc_ops {
int version; /* Protocol version */
struct dentry_operations *dentry_ops;
struct inode_operations *dir_inode_ops;
int (*getroot) (struct nfs_server *, struct nfs_fh *,
struct nfs_fattr *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment