Commit ca486f40 authored by Trond Myklebust's avatar Trond Myklebust

Merge bk://nfsclient.bkbits.net/linux-2.6

into fys.uio.no:/home/linux/bitkeeper/nfsclient-2.6
parents e8323593 9e84df77
...@@ -783,6 +783,54 @@ void d_instantiate(struct dentry *entry, struct inode * inode) ...@@ -783,6 +783,54 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
security_d_instantiate(entry, inode); security_d_instantiate(entry, inode);
} }
/**
* d_instantiate_unique - instantiate a non-aliased dentry
* @entry: dentry to instantiate
* @inode: inode to attach to this dentry
*
* Fill in inode information in the entry. On success, it returns NULL.
* If an unhashed alias of "entry" already exists, then we return the
* aliased dentry instead.
*
* Note that in order to avoid conflicts with rename() etc, the caller
* had better be holding the parent directory semaphore.
*/
struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
{
struct dentry *alias;
int len = entry->d_name.len;
const char *name = entry->d_name.name;
unsigned int hash = entry->d_name.hash;
BUG_ON(!list_empty(&entry->d_alias));
spin_lock(&dcache_lock);
if (!inode)
goto do_negative;
list_for_each_entry(alias, &inode->i_dentry, d_alias) {
struct qstr *qstr = &alias->d_name;
if (qstr->hash != hash)
continue;
if (alias->d_parent != entry->d_parent)
continue;
if (qstr->len != len)
continue;
if (memcmp(qstr->name, name, len))
continue;
dget_locked(alias);
spin_unlock(&dcache_lock);
BUG_ON(!d_unhashed(alias));
return alias;
}
list_add(&entry->d_alias, &inode->i_dentry);
do_negative:
entry->d_inode = inode;
spin_unlock(&dcache_lock);
security_d_instantiate(entry, inode);
return NULL;
}
EXPORT_SYMBOL(d_instantiate_unique);
/** /**
* d_alloc_root - allocate root dentry * d_alloc_root - allocate root dentry
* @root_inode: inode to allocate the root for * @root_inode: inode to allocate the root for
......
...@@ -1563,9 +1563,6 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) ...@@ -1563,9 +1563,6 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
error = filp->f_op->lock(filp, F_GETLK, &file_lock); error = filp->f_op->lock(filp, F_GETLK, &file_lock);
if (error < 0) if (error < 0)
goto out; goto out;
else if (error == LOCK_USE_CLNT)
/* Bypass for NFS with no locking - 2.0.36 compat */
fl = posix_test_lock(filp, &file_lock);
else else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else { } else {
...@@ -1708,9 +1705,6 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) ...@@ -1708,9 +1705,6 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
error = filp->f_op->lock(filp, F_GETLK, &file_lock); error = filp->f_op->lock(filp, F_GETLK, &file_lock);
if (error < 0) if (error < 0)
goto out; goto out;
else if (error == LOCK_USE_CLNT)
/* Bypass for NFS with no locking - 2.0.36 compat */
fl = posix_test_lock(filp, &file_lock);
else else
fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock); fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
} else { } else {
......
...@@ -40,8 +40,6 @@ ...@@ -40,8 +40,6 @@
static int nfs_opendir(struct inode *, struct file *); static int nfs_opendir(struct inode *, struct file *);
static int nfs_readdir(struct file *, void *, filldir_t); static int nfs_readdir(struct file *, void *, filldir_t);
static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
static int nfs_cached_lookup(struct inode *, struct dentry *,
struct nfs_fh *, struct nfs_fattr *);
static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
static int nfs_mkdir(struct inode *, struct dentry *, int); static int nfs_mkdir(struct inode *, struct dentry *, int);
static int nfs_rmdir(struct inode *, struct dentry *); static int nfs_rmdir(struct inode *, struct dentry *);
...@@ -294,24 +292,13 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) ...@@ -294,24 +292,13 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
return res; return res;
} }
static unsigned int nfs_type2dtype[] = { static inline unsigned int dt_type(struct inode *inode)
DT_UNKNOWN,
DT_REG,
DT_DIR,
DT_BLK,
DT_CHR,
DT_LNK,
DT_SOCK,
DT_UNKNOWN,
DT_FIFO
};
static inline
unsigned int nfs_type_to_d_type(enum nfs_ftype type)
{ {
return nfs_type2dtype[type]; return (inode->i_mode >> 12) & 15;
} }
static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
/* /*
* Once we've found the start of the dirent within a page: fill 'er up... * Once we've found the start of the dirent within a page: fill 'er up...
*/ */
...@@ -321,6 +308,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, ...@@ -321,6 +308,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
{ {
struct file *file = desc->file; struct file *file = desc->file;
struct nfs_entry *entry = desc->entry; struct nfs_entry *entry = desc->entry;
struct dentry *dentry = NULL;
unsigned long fileid; unsigned long fileid;
int loop_count = 0, int loop_count = 0,
res; res;
...@@ -333,9 +321,16 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, ...@@ -333,9 +321,16 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
* retrieving the current dirent on the server */ * retrieving the current dirent on the server */
fileid = nfs_fileid_to_ino_t(entry->ino); fileid = nfs_fileid_to_ino_t(entry->ino);
/* Get a dentry if we have one */
if (dentry != NULL)
dput(dentry);
dentry = nfs_readdir_lookup(desc);
/* Use readdirplus info */ /* Use readdirplus info */
if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR)) if (dentry != NULL && dentry->d_inode != NULL) {
d_type = nfs_type_to_d_type(entry->fattr->type); d_type = dt_type(dentry->d_inode);
fileid = dentry->d_inode->i_ino;
}
res = filldir(dirent, entry->name, entry->len, res = filldir(dirent, entry->name, entry->len,
entry->prev_cookie, fileid, d_type); entry->prev_cookie, fileid, d_type);
...@@ -352,7 +347,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, ...@@ -352,7 +347,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
} }
} }
dir_page_release(desc); dir_page_release(desc);
if (dentry != NULL)
dput(dentry);
dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
return res; return res;
} }
...@@ -615,24 +611,10 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) ...@@ -615,24 +611,10 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
goto out_valid; goto out_valid;
} }
/*
* Note: we're not holding inode->i_sem and so may be racing with
* operations that change the directory. We therefore save the
* change attribute *before* we do the RPC call.
*/
verifier = nfs_save_change_attribute(dir);
error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
if (!error) {
if (nfs_compare_fh(NFS_FH(inode), &fhandle))
goto out_bad;
if (nfs_lookup_verify_inode(inode, isopen))
goto out_zap_parent;
goto out_valid_renew;
}
if (NFS_STALE(inode)) if (NFS_STALE(inode))
goto out_bad; goto out_bad;
verifier = nfs_save_change_attribute(dir);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (error) if (error)
goto out_bad; goto out_bad;
...@@ -641,7 +623,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) ...@@ -641,7 +623,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
if ((error = nfs_refresh_inode(inode, &fattr)) != 0) if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
goto out_bad; goto out_bad;
out_valid_renew:
nfs_renew_times(dentry); nfs_renew_times(dentry);
nfs_set_verifier(dentry, verifier); nfs_set_verifier(dentry, verifier);
out_valid: out_valid:
...@@ -723,6 +704,7 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) ...@@ -723,6 +704,7 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
{ {
struct dentry *res;
struct inode *inode = NULL; struct inode *inode = NULL;
int error; int error;
struct nfs_fh fhandle; struct nfs_fh fhandle;
...@@ -731,11 +713,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru ...@@ -731,11 +713,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
dfprintk(VFS, "NFS: lookup(%s/%s)\n", dfprintk(VFS, "NFS: lookup(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name); dentry->d_parent->d_name.name, dentry->d_name.name);
error = -ENAMETOOLONG; res = ERR_PTR(-ENAMETOOLONG);
if (dentry->d_name.len > NFS_SERVER(dir)->namelen) if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
goto out; goto out;
error = -ENOMEM; res = ERR_PTR(-ENOMEM);
dentry->d_op = NFS_PROTO(dir)->dentry_ops; dentry->d_op = NFS_PROTO(dir)->dentry_ops;
lock_kernel(); lock_kernel();
...@@ -746,29 +728,27 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru ...@@ -746,29 +728,27 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
if (nfs_is_exclusive_create(dir, nd)) if (nfs_is_exclusive_create(dir, nd))
goto no_entry; goto no_entry;
error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (error != 0) { if (error == -ENOENT)
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, goto no_entry;
&fhandle, &fattr); if (error < 0) {
if (error == -ENOENT) res = ERR_PTR(error);
goto no_entry; goto out_unlock;
if (error != 0)
goto out_unlock;
} }
error = -EACCES; res = ERR_PTR(-EACCES);
inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
if (!inode) if (!inode)
goto out_unlock; goto out_unlock;
no_entry: no_entry:
error = 0; res = d_add_unique(dentry, inode);
d_add(dentry, inode); if (res != NULL)
dentry = res;
nfs_renew_times(dentry); nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_unlock: out_unlock:
unlock_kernel(); unlock_kernel();
out: out:
BUG_ON(error > 0); return res;
return ERR_PTR(error);
} }
#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4
...@@ -798,15 +778,15 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd) ...@@ -798,15 +778,15 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)
static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{ {
struct dentry *res = NULL;
struct inode *inode = NULL; struct inode *inode = NULL;
int error = 0;
/* Check that we are indeed trying to open this file */ /* Check that we are indeed trying to open this file */
if (!is_atomic_open(dir, nd)) if (!is_atomic_open(dir, nd))
goto no_open; goto no_open;
if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
error = -ENAMETOOLONG; res = ERR_PTR(-ENAMETOOLONG);
goto out; goto out;
} }
dentry->d_op = NFS_PROTO(dir)->dentry_ops; dentry->d_op = NFS_PROTO(dir)->dentry_ops;
...@@ -828,7 +808,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry ...@@ -828,7 +808,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
inode = nfs4_atomic_open(dir, dentry, nd); inode = nfs4_atomic_open(dir, dentry, nd);
unlock_kernel(); unlock_kernel();
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
error = PTR_ERR(inode); int error = PTR_ERR(inode);
switch (error) { switch (error) {
/* Make a negative dentry */ /* Make a negative dentry */
case -ENOENT: case -ENOENT:
...@@ -841,16 +821,18 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry ...@@ -841,16 +821,18 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
/* case -EISDIR: */ /* case -EISDIR: */
/* case -EINVAL: */ /* case -EINVAL: */
default: default:
res = ERR_PTR(error);
goto out; goto out;
} }
} }
no_entry: no_entry:
d_add(dentry, inode); res = d_add_unique(dentry, inode);
if (res != NULL)
dentry = res;
nfs_renew_times(dentry); nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out: out:
BUG_ON(error > 0); return res;
return ERR_PTR(error);
no_open: no_open:
return nfs_lookup(dir, dentry, nd); return nfs_lookup(dir, dentry, nd);
} }
...@@ -906,83 +888,51 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) ...@@ -906,83 +888,51 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
} }
#endif /* CONFIG_NFSV4 */ #endif /* CONFIG_NFSV4 */
static inline static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry)
{ {
struct dentry *parent = desc->file->f_dentry;
struct inode *dir = parent->d_inode;
struct nfs_entry *entry = desc->entry; struct nfs_entry *entry = desc->entry;
int status; struct dentry *dentry, *alias;
struct qstr name = {
while((status = dir_decode(desc)) == 0) { .name = entry->name,
if (entry->len != dentry->d_name.len) .len = entry->len,
continue; };
if (memcmp(entry->name, dentry->d_name.name, entry->len)) struct inode *inode;
continue;
if (!(entry->fattr->valid & NFS_ATTR_FATTR))
continue;
break;
}
return status;
}
/*
* Use the cached Readdirplus results in order to avoid a LOOKUP call
* whenever we believe that the parent directory has not changed.
*
* We assume that any file creation/rename changes the directory mtime.
* As this results in a page cache invalidation whenever it occurs,
* we don't require any other tests for cache coherency.
*/
static
int nfs_cached_lookup(struct inode *dir, struct dentry *dentry,
struct nfs_fh *fh, struct nfs_fattr *fattr)
{
nfs_readdir_descriptor_t desc;
struct nfs_server *server;
struct nfs_entry entry;
struct page *page;
unsigned long timestamp;
int res;
if (!NFS_USE_READDIRPLUS(dir))
return -ENOENT;
server = NFS_SERVER(dir);
/* Don't use readdirplus unless the cache is stable */
if ((server->flags & NFS_MOUNT_NOAC) != 0
|| nfs_caches_unstable(dir)
|| nfs_attribute_timeout(dir))
return -ENOENT;
if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0)
return -ENOENT;
timestamp = NFS_I(dir)->readdir_timestamp;
entry.fh = fh;
entry.fattr = fattr;
desc.decode = NFS_PROTO(dir)->decode_dirent;
desc.entry = &entry;
desc.page_index = 0;
desc.plus = 1;
for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) {
res = -EIO;
if (PageUptodate(page)) {
void * kaddr = kmap_atomic(page, KM_USER0);
desc.ptr = kaddr;
res = find_dirent_name(&desc, page, dentry);
kunmap_atomic(kaddr, KM_USER0);
}
page_cache_release(page);
if (res == 0) switch (name.len) {
goto out_found; case 2:
if (res != -EAGAIN) if (name.name[0] == '.' && name.name[1] == '.')
return dget_parent(parent);
break; break;
case 1:
if (name.name[0] == '.')
return dget(parent);
} }
return -ENOENT; name.hash = full_name_hash(name.name, name.len);
out_found: dentry = d_lookup(parent, &name);
fattr->timestamp = timestamp; if (dentry != NULL)
return 0; return dentry;
if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
return NULL;
/* Note: caller is already holding the dir->i_sem! */
dentry = d_alloc(parent, &name);
if (dentry == NULL)
return NULL;
dentry->d_op = NFS_PROTO(dir)->dentry_ops;
inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
if (!inode) {
dput(dentry);
return NULL;
}
alias = d_add_unique(dentry, inode);
if (alias != NULL) {
dput(dentry);
dentry = alias;
}
nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
return dentry;
} }
/* /*
...@@ -1045,15 +995,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, ...@@ -1045,15 +995,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
if (nd && (nd->flags & LOOKUP_CREATE)) if (nd && (nd->flags & LOOKUP_CREATE))
open_flags = nd->intent.open.flags; open_flags = nd->intent.open.flags;
/*
* The 0 argument passed into the create function should one day
* contain the O_EXCL flag if requested. This allows NFSv3 to
* select the appropriate create strategy. Currently open_namei
* does not pass the create flags.
*/
lock_kernel(); lock_kernel();
nfs_begin_data_update(dir); nfs_begin_data_update(dir);
inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags); inode = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
nfs_end_data_update(dir); nfs_end_data_update(dir);
if (!IS_ERR(inode)) { if (!IS_ERR(inode)) {
d_instantiate(dentry, inode); d_instantiate(dentry, inode);
...@@ -1438,7 +1382,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -1438,7 +1382,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto go_ahead; goto go_ahead;
if (S_ISDIR(new_inode->i_mode)) if (S_ISDIR(new_inode->i_mode))
goto out; goto out;
else if (atomic_read(&new_dentry->d_count) > 1) { else if (atomic_read(&new_dentry->d_count) > 2) {
int err; int err;
/* copy the target dentry's name */ /* copy the target dentry's name */
dentry = d_alloc(new_dentry->d_parent, dentry = d_alloc(new_dentry->d_parent,
...@@ -1453,10 +1397,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -1453,10 +1397,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_inode = NULL; new_inode = NULL;
/* instantiate the replacement target */ /* instantiate the replacement target */
d_instantiate(new_dentry, NULL); d_instantiate(new_dentry, NULL);
} } else if (atomic_read(&new_dentry->d_count) > 1) {
/* dentry still busy? */ /* dentry still busy? */
if (atomic_read(&new_dentry->d_count) > 1) {
#ifdef NFS_PARANOIA #ifdef NFS_PARANOIA
printk("nfs_rename: target %s/%s busy, d_count=%d\n", printk("nfs_rename: target %s/%s busy, d_count=%d\n",
new_dentry->d_parent->d_name.name, new_dentry->d_parent->d_name.name,
...@@ -1510,7 +1452,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs ...@@ -1510,7 +1452,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs
if (cache->cred != cred if (cache->cred != cred
|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
return -ENOENT; return -ENOENT;
memcpy(res, cache, sizeof(*res)); memcpy(res, cache, sizeof(*res));
return 0; return 0;
...@@ -1524,6 +1466,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) ...@@ -1524,6 +1466,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
if (cache->cred) if (cache->cred)
put_rpccred(cache->cred); put_rpccred(cache->cred);
cache->cred = get_rpccred(set->cred); cache->cred = get_rpccred(set->cred);
NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
} }
cache->jiffies = set->jiffies; cache->jiffies = set->jiffies;
cache->mask = set->mask; cache->mask = set->mask;
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
* 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
* 08 Jun 2003 Port to 2.5 APIs --cel * 08 Jun 2003 Port to 2.5 APIs --cel
* 31 Mar 2004 Handle direct I/O without VFS support --cel * 31 Mar 2004 Handle direct I/O without VFS support --cel
* 15 Sep 2004 Parallel async reads --cel
* *
*/ */
...@@ -43,6 +44,7 @@ ...@@ -43,6 +44,7 @@
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/kref.h>
#include <linux/nfs_fs.h> #include <linux/nfs_fs.h>
#include <linux/nfs_page.h> #include <linux/nfs_page.h>
...@@ -50,11 +52,27 @@ ...@@ -50,11 +52,27 @@
#include <asm/system.h> #include <asm/system.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/atomic.h>
#define NFSDBG_FACILITY NFSDBG_VFS #define NFSDBG_FACILITY NFSDBG_VFS
#define VERF_SIZE (2 * sizeof(__u32))
#define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT) #define MAX_DIRECTIO_SIZE (4096UL << PAGE_SHIFT)
static kmem_cache_t *nfs_direct_cachep;
/*
* This represents a set of asynchronous requests that we're waiting on
*/
struct nfs_direct_req {
struct kref kref; /* release manager */
struct list_head list; /* nfs_read_data structs */
wait_queue_head_t wait; /* wait for i/o completion */
struct page ** pages; /* pages in our buffer */
unsigned int npages; /* count of pages */
atomic_t complete, /* i/os we're waiting for */
count, /* bytes actually processed */
error; /* any reported error */
};
/** /**
* nfs_get_user_pages - find and set up pages underlying user's buffer * nfs_get_user_pages - find and set up pages underlying user's buffer
...@@ -71,7 +89,8 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, ...@@ -71,7 +89,8 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
unsigned long page_count; unsigned long page_count;
size_t array_size; size_t array_size;
/* set an arbitrary limit to prevent arithmetic overflow */ /* set an arbitrary limit to prevent type overflow */
/* XXX: this can probably be as large as INT_MAX */
if (size > MAX_DIRECTIO_SIZE) if (size > MAX_DIRECTIO_SIZE)
return -EFBIG; return -EFBIG;
...@@ -93,6 +112,8 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, ...@@ -93,6 +112,8 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
/** /**
* nfs_free_user_pages - tear down page struct array * nfs_free_user_pages - tear down page struct array
* @pages: array of page struct pointers underlying target buffer * @pages: array of page struct pointers underlying target buffer
* @npages: number of pages in the array
* @do_dirty: dirty the pages as we release them
*/ */
static void static void
nfs_free_user_pages(struct page **pages, int npages, int do_dirty) nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
...@@ -107,77 +128,231 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty) ...@@ -107,77 +128,231 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
} }
/** /**
* nfs_direct_read_seg - Read in one iov segment. Generate separate * nfs_direct_req_release - release nfs_direct_req structure for direct read
* read RPCs for each "rsize" bytes. * @kref: kref object embedded in an nfs_direct_req structure
* @inode: target inode *
* @ctx: target file open context
* user_addr: starting address of this segment of user's buffer
* count: size of this segment
* file_offset: offset in file to begin the operation
* @pages: array of addresses of page structs defining user's buffer
* nr_pages: size of pages array
*/ */
static int static void nfs_direct_req_release(struct kref *kref)
nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
unsigned long user_addr, size_t count, loff_t file_offset,
struct page **pages, int nr_pages)
{ {
const unsigned int rsize = NFS_SERVER(inode)->rsize; struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
int tot_bytes = 0; kmem_cache_free(nfs_direct_cachep, dreq);
int curpage = 0; }
struct nfs_read_data rdata = {
.inode = inode, /**
.cred = ctx->cred, * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
.args = { * @count: count of bytes for the read request
.fh = NFS_FH(inode), * @rsize: local rsize setting
.context = ctx, *
}, * Note we also set the number of requests we have in the dreq when we are
.res = { * done. This prevents races with I/O completion so we will always wait
.fattr = &rdata.fattr, * until all requests have been dispatched and completed.
}, */
}; static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
{
struct list_head *list;
struct nfs_direct_req *dreq;
unsigned int reads = 0;
dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
if (!dreq)
return NULL;
kref_init(&dreq->kref);
init_waitqueue_head(&dreq->wait);
INIT_LIST_HEAD(&dreq->list);
atomic_set(&dreq->count, 0);
atomic_set(&dreq->error, 0);
list = &dreq->list;
for(;;) {
struct nfs_read_data *data = nfs_readdata_alloc();
if (unlikely(!data)) {
while (!list_empty(list)) {
data = list_entry(list->next,
struct nfs_read_data, pages);
list_del(&data->pages);
nfs_readdata_free(data);
}
kref_put(&dreq->kref, nfs_direct_req_release);
return NULL;
}
INIT_LIST_HEAD(&data->pages);
list_add(&data->pages, list);
rdata.args.pgbase = user_addr & ~PAGE_MASK; data->req = (struct nfs_page *) dreq;
rdata.args.offset = file_offset; reads++;
do { if (nbytes <= rsize)
int result; break;
nbytes -= rsize;
}
kref_get(&dreq->kref);
atomic_set(&dreq->complete, reads);
return dreq;
}
/**
* nfs_direct_read_result - handle a read reply for a direct read request
* @data: address of NFS READ operation control block
* @status: status of this NFS READ operation
*
* We must hold a reference to all the pages in this direct read request
* until the RPCs complete. This could be long *after* we are woken up in
* nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
*/
static void nfs_direct_read_result(struct nfs_read_data *data, int status)
{
struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
rdata.args.count = count; if (likely(status >= 0))
if (rdata.args.count > rsize) atomic_add(data->res.count, &dreq->count);
rdata.args.count = rsize; else
rdata.args.pages = &pages[curpage]; atomic_set(&dreq->error, status);
dprintk("NFS: direct read: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", if (unlikely(atomic_dec_and_test(&dreq->complete))) {
rdata.args.count, (long long) rdata.args.offset, nfs_free_user_pages(dreq->pages, dreq->npages, 1);
user_addr + tot_bytes, rdata.args.pgbase, curpage); wake_up(&dreq->wait);
kref_put(&dreq->kref, nfs_direct_req_release);
}
}
/**
* nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
* @dreq: address of nfs_direct_req struct for this request
* @inode: target inode
* @ctx: target file open context
* @user_addr: starting address of this segment of user's buffer
* @count: size of this segment
* @file_offset: offset in file to begin the operation
*
* For each nfs_read_data struct that was allocated on the list, dispatch
* an NFS READ operation
*/
static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
struct inode *inode, struct nfs_open_context *ctx,
unsigned long user_addr, size_t count, loff_t file_offset)
{
struct list_head *list = &dreq->list;
struct page **pages = dreq->pages;
unsigned int curpage, pgbase;
unsigned int rsize = NFS_SERVER(inode)->rsize;
curpage = 0;
pgbase = user_addr & ~PAGE_MASK;
do {
struct nfs_read_data *data;
unsigned int bytes;
bytes = rsize;
if (count < rsize)
bytes = count;
data = list_entry(list->next, struct nfs_read_data, pages);
list_del_init(&data->pages);
data->inode = inode;
data->cred = ctx->cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
data->args.offset = file_offset;
data->args.pgbase = pgbase;
data->args.pages = &pages[curpage];
data->args.count = bytes;
data->res.fattr = &data->fattr;
data->res.eof = 0;
data->res.count = bytes;
NFS_PROTO(inode)->read_setup(data);
data->task.tk_cookie = (unsigned long) inode;
data->task.tk_calldata = data;
data->task.tk_release = nfs_readdata_release;
data->complete = nfs_direct_read_result;
lock_kernel(); lock_kernel();
result = NFS_PROTO(inode)->read(&rdata); rpc_execute(&data->task);
unlock_kernel(); unlock_kernel();
if (result <= 0) { dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
if (tot_bytes > 0) data->task.tk_pid,
break; inode->i_sb->s_id,
if (result == -EISDIR) (long long)NFS_FILEID(inode),
result = -EINVAL; bytes,
return result; (unsigned long long)data->args.offset);
}
tot_bytes += result; file_offset += bytes;
if (rdata.res.eof) pgbase += bytes;
break; curpage += pgbase >> PAGE_SHIFT;
pgbase &= ~PAGE_MASK;
rdata.args.offset += result; count -= bytes;
rdata.args.pgbase += result;
curpage += rdata.args.pgbase >> PAGE_SHIFT;
rdata.args.pgbase &= ~PAGE_MASK;
count -= result;
} while (count != 0); } while (count != 0);
}
/**
* nfs_direct_read_wait - wait for I/O completion for direct reads
* @dreq: request on which we are to wait
* @intr: whether or not this wait can be interrupted
*
* Collects and returns the final error value/byte-count.
*/
static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
{
int result = 0;
if (intr) {
result = wait_event_interruptible(dreq->wait,
(atomic_read(&dreq->complete) == 0));
} else {
wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
}
/* XXX: should we zero the rest of the user's buffer if we if (!result)
* hit eof? */ result = atomic_read(&dreq->error);
if (!result)
result = atomic_read(&dreq->count);
return tot_bytes; kref_put(&dreq->kref, nfs_direct_req_release);
return (ssize_t) result;
}
/**
* nfs_direct_read_seg - Read in one iov segment. Generate separate
* read RPCs for each "rsize" bytes.
* @inode: target inode
* @ctx: target file open context
* @user_addr: starting address of this segment of user's buffer
* @count: size of this segment
* @file_offset: offset in file to begin the operation
* @pages: array of addresses of page structs defining user's buffer
* @nr_pages: number of pages in the array
*
*/
static ssize_t nfs_direct_read_seg(struct inode *inode,
struct nfs_open_context *ctx, unsigned long user_addr,
size_t count, loff_t file_offset, struct page **pages,
unsigned int nr_pages)
{
ssize_t result;
sigset_t oldset;
struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_direct_req *dreq;
dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
if (!dreq)
return -ENOMEM;
dreq->pages = pages;
dreq->npages = nr_pages;
rpc_clnt_sigmask(clnt, &oldset);
nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
file_offset);
result = nfs_direct_read_wait(dreq, clnt->cl_intr);
rpc_clnt_sigunmask(clnt, &oldset);
return result;
} }
/** /**
...@@ -189,9 +364,8 @@ nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, ...@@ -189,9 +364,8 @@ nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
* file_offset: offset in file to begin the operation * file_offset: offset in file to begin the operation
* nr_segs: size of iovec array * nr_segs: size of iovec array
* *
* generic_file_direct_IO has already pushed out any non-direct * We've already pushed out any non-direct writes so that this read
* writes so that this read will see them when we read from the * will see them when we read from the server.
* server.
*/ */
static ssize_t static ssize_t
nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
...@@ -220,8 +394,6 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, ...@@ -220,8 +394,6 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
result = nfs_direct_read_seg(inode, ctx, user_addr, size, result = nfs_direct_read_seg(inode, ctx, user_addr, size,
file_offset, pages, page_count); file_offset, pages, page_count);
nfs_free_user_pages(pages, page_count, 1);
if (result <= 0) { if (result <= 0) {
if (tot_bytes > 0) if (tot_bytes > 0)
break; break;
...@@ -247,31 +419,31 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, ...@@ -247,31 +419,31 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
* @pages: array of addresses of page structs defining user's buffer * @pages: array of addresses of page structs defining user's buffer
* nr_pages: size of pages array * nr_pages: size of pages array
*/ */
static int static ssize_t nfs_direct_write_seg(struct inode *inode,
nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, struct nfs_open_context *ctx, unsigned long user_addr,
unsigned long user_addr, size_t count, loff_t file_offset, size_t count, loff_t file_offset, struct page **pages,
struct page **pages, int nr_pages) int nr_pages)
{ {
const unsigned int wsize = NFS_SERVER(inode)->wsize; const unsigned int wsize = NFS_SERVER(inode)->wsize;
size_t request; size_t request;
int curpage, need_commit, result, tot_bytes; int curpage, need_commit;
ssize_t result, tot_bytes;
struct nfs_writeverf first_verf; struct nfs_writeverf first_verf;
struct nfs_write_data wdata = { struct nfs_write_data *wdata;
.inode = inode,
.cred = ctx->cred,
.args = {
.fh = NFS_FH(inode),
.context = ctx,
},
.res = {
.fattr = &wdata.fattr,
.verf = &wdata.verf,
},
};
wdata.args.stable = NFS_UNSTABLE; wdata = nfs_writedata_alloc();
if (!wdata)
return -ENOMEM;
wdata->inode = inode;
wdata->cred = ctx->cred;
wdata->args.fh = NFS_FH(inode);
wdata->args.context = ctx;
wdata->args.stable = NFS_UNSTABLE;
if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
wdata.args.stable = NFS_FILE_SYNC; wdata->args.stable = NFS_FILE_SYNC;
wdata->res.fattr = &wdata->fattr;
wdata->res.verf = &wdata->verf;
nfs_begin_data_update(inode); nfs_begin_data_update(inode);
retry: retry:
...@@ -279,20 +451,20 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, ...@@ -279,20 +451,20 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
tot_bytes = 0; tot_bytes = 0;
curpage = 0; curpage = 0;
request = count; request = count;
wdata.args.pgbase = user_addr & ~PAGE_MASK; wdata->args.pgbase = user_addr & ~PAGE_MASK;
wdata.args.offset = file_offset; wdata->args.offset = file_offset;
do { do {
wdata.args.count = request; wdata->args.count = request;
if (wdata.args.count > wsize) if (wdata->args.count > wsize)
wdata.args.count = wsize; wdata->args.count = wsize;
wdata.args.pages = &pages[curpage]; wdata->args.pages = &pages[curpage];
dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
wdata.args.count, (long long) wdata.args.offset, wdata->args.count, (long long) wdata->args.offset,
user_addr + tot_bytes, wdata.args.pgbase, curpage); user_addr + tot_bytes, wdata->args.pgbase, curpage);
lock_kernel(); lock_kernel();
result = NFS_PROTO(inode)->write(&wdata); result = NFS_PROTO(inode)->write(wdata);
unlock_kernel(); unlock_kernel();
if (result <= 0) { if (result <= 0) {
...@@ -302,20 +474,25 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, ...@@ -302,20 +474,25 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
} }
if (tot_bytes == 0) if (tot_bytes == 0)
memcpy(&first_verf.verifier, &wdata.verf.verifier, memcpy(&first_verf.verifier, &wdata->verf.verifier,
VERF_SIZE); sizeof(first_verf.verifier));
if (wdata.verf.committed != NFS_FILE_SYNC) { if (wdata->verf.committed != NFS_FILE_SYNC) {
need_commit = 1; need_commit = 1;
if (memcmp(&first_verf.verifier, if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
&wdata.verf.verifier, VERF_SIZE)) sizeof(first_verf.verifier)));
goto sync_retry; goto sync_retry;
} }
tot_bytes += result; tot_bytes += result;
wdata.args.offset += result;
wdata.args.pgbase += result; /* in case of a short write: stop now, let the app recover */
curpage += wdata.args.pgbase >> PAGE_SHIFT; if (result < wdata->args.count)
wdata.args.pgbase &= ~PAGE_MASK; break;
wdata->args.offset += result;
wdata->args.pgbase += result;
curpage += wdata->args.pgbase >> PAGE_SHIFT;
wdata->args.pgbase &= ~PAGE_MASK;
request -= result; request -= result;
} while (request != 0); } while (request != 0);
...@@ -323,27 +500,27 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, ...@@ -323,27 +500,27 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
* Commit data written so far, even in the event of an error * Commit data written so far, even in the event of an error
*/ */
if (need_commit) { if (need_commit) {
wdata.args.count = tot_bytes; wdata->args.count = tot_bytes;
wdata.args.offset = file_offset; wdata->args.offset = file_offset;
lock_kernel(); lock_kernel();
result = NFS_PROTO(inode)->commit(&wdata); result = NFS_PROTO(inode)->commit(wdata);
unlock_kernel(); unlock_kernel();
if (result < 0 || memcmp(&first_verf.verifier, if (result < 0 || memcmp(&first_verf.verifier,
&wdata.verf.verifier, &wdata->verf.verifier,
VERF_SIZE) != 0) sizeof(first_verf.verifier)) != 0)
goto sync_retry; goto sync_retry;
} }
result = tot_bytes; result = tot_bytes;
out: out:
nfs_end_data_update_defer(inode); nfs_end_data_update_defer(inode);
nfs_writedata_free(wdata);
return result; return result;
sync_retry: sync_retry:
wdata.args.stable = NFS_FILE_SYNC; wdata->args.stable = NFS_FILE_SYNC;
goto retry; goto retry;
} }
...@@ -360,9 +537,9 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, ...@@ -360,9 +537,9 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
* that non-direct readers might access, so they will pick up these * that non-direct readers might access, so they will pick up these
* writes immediately. * writes immediately.
*/ */
static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, static ssize_t nfs_direct_write(struct inode *inode,
const struct iovec *iov, loff_t file_offset, struct nfs_open_context *ctx, const struct iovec *iov,
unsigned long nr_segs) loff_t file_offset, unsigned long nr_segs)
{ {
ssize_t tot_bytes = 0; ssize_t tot_bytes = 0;
unsigned long seg = 0; unsigned long seg = 0;
...@@ -501,6 +678,8 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t ...@@ -501,6 +678,8 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
if (mapping->nrpages) { if (mapping->nrpages) {
retval = filemap_fdatawrite(mapping); retval = filemap_fdatawrite(mapping);
if (retval == 0)
retval = nfs_wb_all(inode);
if (retval == 0) if (retval == 0)
retval = filemap_fdatawait(mapping); retval = filemap_fdatawait(mapping);
if (retval) if (retval)
...@@ -590,6 +769,8 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, ...@@ -590,6 +769,8 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
if (mapping->nrpages) { if (mapping->nrpages) {
retval = filemap_fdatawrite(mapping); retval = filemap_fdatawrite(mapping);
if (retval == 0)
retval = nfs_wb_all(inode);
if (retval == 0) if (retval == 0)
retval = filemap_fdatawait(mapping); retval = filemap_fdatawait(mapping);
if (retval) if (retval)
...@@ -605,3 +786,21 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, ...@@ -605,3 +786,21 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
out: out:
return retval; return retval;
} }
int nfs_init_directcache(void)
{
nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
sizeof(struct nfs_direct_req),
0, SLAB_RECLAIM_ACCOUNT,
NULL, NULL);
if (nfs_direct_cachep == NULL)
return -ENOMEM;
return 0;
}
void nfs_destroy_directcache(void)
{
if (kmem_cache_destroy(nfs_direct_cachep))
printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
}
...@@ -295,10 +295,19 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t ...@@ -295,10 +295,19 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
{ {
struct inode *inode = filp->f_mapping->host; struct inode *inode = filp->f_mapping->host;
int status; int status = 0;
lock_kernel(); lock_kernel();
status = NFS_PROTO(inode)->lock(filp, cmd, fl); /* Use local locking if mounted with "-onolock" */
if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else {
struct file_lock *cfl = posix_test_lock(filp, fl);
if (cfl != NULL) {
memcpy(fl, cfl, sizeof(*fl));
fl->fl_type = F_UNLCK;
}
}
unlock_kernel(); unlock_kernel();
return status; return status;
} }
...@@ -325,7 +334,11 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) ...@@ -325,7 +334,11 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
* still need to complete the unlock. * still need to complete the unlock.
*/ */
lock_kernel(); lock_kernel();
status = NFS_PROTO(inode)->lock(filp, cmd, fl); /* Use local locking if mounted with "-onolock" */
if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
else
status = posix_lock_file_wait(filp, fl);
rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
return status; return status;
} }
...@@ -351,15 +364,19 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) ...@@ -351,15 +364,19 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
return status; return status;
lock_kernel(); lock_kernel();
status = NFS_PROTO(inode)->lock(filp, cmd, fl); /* Use local locking if mounted with "-onolock" */
/* If we were signalled we still need to ensure that if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
* we clean up any state on the server. We therefore status = NFS_PROTO(inode)->lock(filp, cmd, fl);
* record the lock call as having succeeded in order to /* If we were signalled we still need to ensure that
* ensure that locks_remove_posix() cleans it out when * we clean up any state on the server. We therefore
* the process exits. * record the lock call as having succeeded in order to
*/ * ensure that locks_remove_posix() cleans it out when
if (status == -EINTR || status == -ERESTARTSYS) * the process exits.
posix_lock_file(filp, fl); */
if (status == -EINTR || status == -ERESTARTSYS)
posix_lock_file(filp, fl);
} else
status = posix_lock_file_wait(filp, fl);
unlock_kernel(); unlock_kernel();
if (status < 0) if (status < 0)
return status; return status;
...@@ -396,15 +413,6 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl) ...@@ -396,15 +413,6 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
return -ENOLCK; return -ENOLCK;
if (NFS_PROTO(inode)->version != 4) {
/* Fake OK code if mounted without NLM support */
if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) {
if (IS_GETLK(cmd))
return LOCK_USE_CLNT;
return 0;
}
}
/* /*
* No BSD flocks over NFS allowed. * No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by * Note: we could try to fake a POSIX lock request here by
......
...@@ -486,13 +486,27 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf) ...@@ -486,13 +486,27 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
if (error < 0) if (error < 0)
goto out_err; goto out_err;
buf->f_frsize = server->wtmult; /*
* Current versions of glibc do not correctly handle the
* case where f_frsize != f_bsize. Eventually we want to
* report the value of wtmult in this field.
*/
buf->f_frsize = sb->s_blocksize;
/*
* On most *nix systems, f_blocks, f_bfree, and f_bavail
* are reported in units of f_frsize. Linux hasn't had
* an f_frsize field in its statfs struct until recently,
* thus historically Linux's sys_statfs reports these
* fields in units of f_bsize.
*/
buf->f_bsize = sb->s_blocksize; buf->f_bsize = sb->s_blocksize;
blockbits = sb->s_blocksize_bits; blockbits = sb->s_blocksize_bits;
blockres = (1 << blockbits) - 1; blockres = (1 << blockbits) - 1;
buf->f_blocks = (res.tbytes + blockres) >> blockbits; buf->f_blocks = (res.tbytes + blockres) >> blockbits;
buf->f_bfree = (res.fbytes + blockres) >> blockbits; buf->f_bfree = (res.fbytes + blockres) >> blockbits;
buf->f_bavail = (res.abytes + blockres) >> blockbits; buf->f_bavail = (res.abytes + blockres) >> blockbits;
buf->f_files = res.tfiles; buf->f_files = res.tfiles;
buf->f_ffree = res.afiles; buf->f_ffree = res.afiles;
...@@ -565,9 +579,9 @@ nfs_zap_caches(struct inode *inode) ...@@ -565,9 +579,9 @@ nfs_zap_caches(struct inode *inode)
memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
else else
nfsi->flags |= NFS_INO_INVALID_ATTR; nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
} }
/* /*
...@@ -605,7 +619,7 @@ nfs_find_actor(struct inode *inode, void *opaque) ...@@ -605,7 +619,7 @@ nfs_find_actor(struct inode *inode, void *opaque)
return 0; return 0;
if (nfs_compare_fh(NFS_FH(inode), fh)) if (nfs_compare_fh(NFS_FH(inode), fh))
return 0; return 0;
if (is_bad_inode(inode)) if (is_bad_inode(inode) || NFS_STALE(inode))
return 0; return 0;
return 1; return 1;
} }
...@@ -766,13 +780,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -766,13 +780,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
vmtruncate(inode, attr->ia_size); vmtruncate(inode, attr->ia_size);
} }
} }
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
if (*cred) {
put_rpccred(*cred);
*cred = NULL;
}
}
nfs_end_data_update(inode); nfs_end_data_update(inode);
unlock_kernel(); unlock_kernel();
return error; return error;
...@@ -949,14 +958,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) ...@@ -949,14 +958,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
lock_kernel(); lock_kernel();
if (!inode || is_bad_inode(inode)) if (!inode || is_bad_inode(inode))
goto out_nowait; goto out_nowait;
if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) if (NFS_STALE(inode))
goto out_nowait; goto out_nowait;
while (NFS_REVALIDATING(inode)) { while (NFS_REVALIDATING(inode)) {
status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
if (status < 0) if (status < 0)
goto out_nowait; goto out_nowait;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC) if (NFS_ATTRTIMEO(inode) == 0)
continue; continue;
if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
continue; continue;
...@@ -968,14 +977,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) ...@@ -968,14 +977,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
/* Protect against RPC races by saving the change attribute */ /* Protect against RPC races by saving the change attribute */
verifier = nfs_save_change_attribute(inode); verifier = nfs_save_change_attribute(inode);
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
if (status) { if (status != 0) {
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
inode->i_sb->s_id, inode->i_sb->s_id,
(long long)NFS_FILEID(inode), status); (long long)NFS_FILEID(inode), status);
if (status == -ESTALE) { if (status == -ESTALE) {
NFS_FLAGS(inode) |= NFS_INO_STALE; nfs_zap_caches(inode);
if (inode != inode->i_sb->s_root->d_inode) if (!S_ISDIR(inode->i_mode))
remove_inode_hash(inode); NFS_FLAGS(inode) |= NFS_INO_STALE;
} }
goto out; goto out;
} }
...@@ -1014,7 +1023,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) ...@@ -1014,7 +1023,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
inode->i_sb->s_id, inode->i_sb->s_id,
(long long)NFS_FILEID(inode)); (long long)NFS_FILEID(inode));
NFS_FLAGS(inode) &= ~NFS_INO_STALE;
out: out:
NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
wake_up(&nfsi->nfs_i_wait); wake_up(&nfsi->nfs_i_wait);
...@@ -1161,7 +1169,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1161,7 +1169,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
|| inode->i_uid != fattr->uid || inode->i_uid != fattr->uid
|| inode->i_gid != fattr->gid) || inode->i_gid != fattr->gid)
nfsi->flags |= NFS_INO_INVALID_ATTR; nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
/* Has the link count changed? */ /* Has the link count changed? */
if (inode->i_nlink != fattr->nlink) if (inode->i_nlink != fattr->nlink)
...@@ -1270,7 +1278,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign ...@@ -1270,7 +1278,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
#endif #endif
nfsi->change_attr = fattr->change_attr; nfsi->change_attr = fattr->change_attr;
if (!data_unstable) if (!data_unstable)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
} }
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
...@@ -1278,14 +1286,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign ...@@ -1278,14 +1286,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
inode->i_uid != fattr->uid || inode->i_uid != fattr->uid ||
inode->i_gid != fattr->gid) { inode->i_gid != fattr->gid)
struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
if (*cred) {
put_rpccred(*cred);
*cred = NULL;
}
invalid |= NFS_INO_INVALID_ATTR;
}
inode->i_mode = fattr->mode; inode->i_mode = fattr->mode;
inode->i_nlink = fattr->nlink; inode->i_nlink = fattr->nlink;
...@@ -1335,7 +1337,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign ...@@ -1335,7 +1337,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
*/ */
nfs_invalidate_inode(inode); nfs_invalidate_inode(inode);
out_err: out_err:
return -EIO; NFS_FLAGS(inode) |= NFS_INO_STALE;
return -ESTALE;
} }
/* /*
...@@ -1449,8 +1452,6 @@ static void nfs_kill_super(struct super_block *s) ...@@ -1449,8 +1452,6 @@ static void nfs_kill_super(struct super_block *s)
kill_anon_super(s); kill_anon_super(s);
nfs4_renewd_prepare_shutdown(server);
if (server->client != NULL && !IS_ERR(server->client)) if (server->client != NULL && !IS_ERR(server->client))
rpc_shutdown_client(server->client); rpc_shutdown_client(server->client);
if (server->client_sys != NULL && !IS_ERR(server->client_sys)) if (server->client_sys != NULL && !IS_ERR(server->client_sys))
...@@ -1461,8 +1462,6 @@ static void nfs_kill_super(struct super_block *s) ...@@ -1461,8 +1462,6 @@ static void nfs_kill_super(struct super_block *s)
rpciod_down(); /* release rpciod */ rpciod_down(); /* release rpciod */
destroy_nfsv4_state(server);
if (server->hostname != NULL) if (server->hostname != NULL)
kfree(server->hostname); kfree(server->hostname);
kfree(server); kfree(server);
...@@ -1543,9 +1542,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, ...@@ -1543,9 +1542,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
server->wsize = nfs_block_size(data->wsize, NULL); server->wsize = nfs_block_size(data->wsize, NULL);
server->flags = data->flags & NFS_MOUNT_FLAGMASK; server->flags = data->flags & NFS_MOUNT_FLAGMASK;
/* NFSv4 doesn't use NLM locking */
server->flags |= NFS_MOUNT_NONLM;
server->acregmin = data->acregmin*HZ; server->acregmin = data->acregmin*HZ;
server->acregmax = data->acregmax*HZ; server->acregmax = data->acregmax*HZ;
server->acdirmin = data->acdirmin*HZ; server->acdirmin = data->acdirmin*HZ;
...@@ -1790,8 +1786,22 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, ...@@ -1790,8 +1786,22 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
static void nfs4_kill_super(struct super_block *sb) static void nfs4_kill_super(struct super_block *sb)
{ {
struct nfs_server *server = NFS_SB(sb);
nfs_return_all_delegations(sb); nfs_return_all_delegations(sb);
nfs_kill_super(sb); kill_anon_super(sb);
nfs4_renewd_prepare_shutdown(server);
if (server->client != NULL && !IS_ERR(server->client))
rpc_shutdown_client(server->client);
rpciod_down(); /* release rpciod */
destroy_nfsv4_state(server);
if (server->hostname != NULL)
kfree(server->hostname);
kfree(server);
} }
static struct file_system_type nfs4_fs_type = { static struct file_system_type nfs4_fs_type = {
...@@ -1821,9 +1831,13 @@ static struct file_system_type nfs4_fs_type = { ...@@ -1821,9 +1831,13 @@ static struct file_system_type nfs4_fs_type = {
extern int nfs_init_nfspagecache(void); extern int nfs_init_nfspagecache(void);
extern void nfs_destroy_nfspagecache(void); extern void nfs_destroy_nfspagecache(void);
extern int nfs_init_readpagecache(void); extern int nfs_init_readpagecache(void);
extern int nfs_destroy_readpagecache(void); extern void nfs_destroy_readpagecache(void);
extern int nfs_init_writepagecache(void); extern int nfs_init_writepagecache(void);
extern int nfs_destroy_writepagecache(void); extern void nfs_destroy_writepagecache(void);
#ifdef CONFIG_NFS_DIRECTIO
extern int nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
#endif
static kmem_cache_t * nfs_inode_cachep; static kmem_cache_t * nfs_inode_cachep;
...@@ -1904,6 +1918,12 @@ static int __init init_nfs_fs(void) ...@@ -1904,6 +1918,12 @@ static int __init init_nfs_fs(void)
if (err) if (err)
goto out1; goto out1;
#ifdef CONFIG_NFS_DIRECTIO
err = nfs_init_directcache();
if (err)
goto out0;
#endif
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
rpc_proc_register(&nfs_rpcstat); rpc_proc_register(&nfs_rpcstat);
#endif #endif
...@@ -1914,8 +1934,14 @@ static int __init init_nfs_fs(void) ...@@ -1914,8 +1934,14 @@ static int __init init_nfs_fs(void)
goto out; goto out;
return 0; return 0;
out: out:
#ifdef CONFIG_PROC_FS
rpc_proc_unregister("nfs"); rpc_proc_unregister("nfs");
#endif
nfs_destroy_writepagecache(); nfs_destroy_writepagecache();
#ifdef CONFIG_NFS_DIRECTIO
out0:
nfs_destroy_directcache();
#endif
out1: out1:
nfs_destroy_readpagecache(); nfs_destroy_readpagecache();
out2: out2:
...@@ -1928,6 +1954,9 @@ static int __init init_nfs_fs(void) ...@@ -1928,6 +1954,9 @@ static int __init init_nfs_fs(void)
static void __exit exit_nfs_fs(void) static void __exit exit_nfs_fs(void)
{ {
#ifdef CONFIG_NFS_DIRECTIO
nfs_destroy_directcache();
#endif
nfs_destroy_writepagecache(); nfs_destroy_writepagecache();
nfs_destroy_readpagecache(); nfs_destroy_readpagecache();
nfs_destroy_inodecache(); nfs_destroy_inodecache();
......
...@@ -80,10 +80,10 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, ...@@ -80,10 +80,10 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk("%s: call fsinfo\n", __FUNCTION__); dprintk("%s: call fsinfo\n", __FUNCTION__);
info->fattr->valid = 0; info->fattr->valid = 0;
status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status); dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
if (!(info->fattr->valid & NFS_ATTR_FATTR)) { if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0); status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
dprintk("%s: reply getattr %d\n", __FUNCTION__, status); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
} }
return status; return status;
} }
...@@ -101,7 +101,7 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, ...@@ -101,7 +101,7 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
fattr->valid = 0; fattr->valid = 0;
status = rpc_call(server->client, NFS3PROC_GETATTR, status = rpc_call(server->client, NFS3PROC_GETATTR,
fhandle, fattr, 0); fhandle, fattr, 0);
dprintk("NFS reply getattr\n"); dprintk("NFS reply getattr: %d\n", status);
return status; return status;
} }
...@@ -119,7 +119,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, ...@@ -119,7 +119,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
dprintk("NFS call setattr\n"); dprintk("NFS call setattr\n");
fattr->valid = 0; fattr->valid = 0;
status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
dprintk("NFS reply setattr\n"); dprintk("NFS reply setattr: %d\n", status);
return status; return status;
} }
...@@ -198,7 +198,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) ...@@ -198,7 +198,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
entry->mask |= MAY_EXEC; entry->mask |= MAY_EXEC;
} }
dprintk("NFS reply access, status = %d\n", status); dprintk("NFS reply access: %d\n", status);
return status; return status;
} }
...@@ -296,7 +296,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata) ...@@ -296,7 +296,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
* For now, we don't implement O_EXCL. * For now, we don't implement O_EXCL.
*/ */
static struct inode * static struct inode *
nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags) int flags)
{ {
struct nfs_fh fhandle; struct nfs_fh fhandle;
...@@ -304,8 +304,8 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, ...@@ -304,8 +304,8 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
struct nfs_fattr dir_attr; struct nfs_fattr dir_attr;
struct nfs3_createargs arg = { struct nfs3_createargs arg = {
.fh = NFS_FH(dir), .fh = NFS_FH(dir),
.name = name->name, .name = dentry->d_name.name,
.len = name->len, .len = dentry->d_name.len,
.sattr = sattr, .sattr = sattr,
}; };
struct nfs3_diropres res = { struct nfs3_diropres res = {
...@@ -315,7 +315,7 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, ...@@ -315,7 +315,7 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
}; };
int status; int status;
dprintk("NFS call create %s\n", name->name); dprintk("NFS call create %s\n", dentry->d_name.name);
arg.createmode = NFS3_CREATE_UNCHECKED; arg.createmode = NFS3_CREATE_UNCHECKED;
if (flags & O_EXCL) { if (flags & O_EXCL) {
arg.createmode = NFS3_CREATE_EXCLUSIVE; arg.createmode = NFS3_CREATE_EXCLUSIVE;
...@@ -353,7 +353,7 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, ...@@ -353,7 +353,7 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
if (status != 0) if (status != 0)
goto out; goto out;
if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) { if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) {
status = nfs3_proc_lookup(dir, name, &fhandle, &fattr); status = nfs3_proc_lookup(dir, &dentry->d_name, &fhandle, &fattr);
if (status != 0) if (status != 0)
goto out; goto out;
} }
......
...@@ -477,7 +477,7 @@ static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, st ...@@ -477,7 +477,7 @@ static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, st
/* /*
* Returns an nfs4_state + an referenced inode * Returns an nfs4_state + an referenced inode
*/ */
static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
{ {
struct nfs4_state_owner *sp; struct nfs4_state_owner *sp;
struct nfs4_state *state = NULL; struct nfs4_state *state = NULL;
...@@ -491,7 +491,7 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct ...@@ -491,7 +491,7 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
struct nfs_openargs o_arg = { struct nfs_openargs o_arg = {
.fh = NFS_FH(dir), .fh = NFS_FH(dir),
.open_flags = flags, .open_flags = flags,
.name = name, .name = &dentry->d_name,
.server = server, .server = server,
.bitmask = server->attr_bitmask, .bitmask = server->attr_bitmask,
.claim = NFS4_OPEN_CLAIM_NULL, .claim = NFS4_OPEN_CLAIM_NULL,
...@@ -581,14 +581,14 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct ...@@ -581,14 +581,14 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
} }
struct nfs4_state *nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
{ {
struct nfs4_exception exception = { }; struct nfs4_exception exception = { };
struct nfs4_state *res; struct nfs4_state *res;
int status; int status;
do { do {
status = _nfs4_do_open(dir, name, flags, sattr, cred, &res); status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
if (status == 0) if (status == 0)
break; break;
/* NOTE: BAD_SEQID means the server and client disagree about the /* NOTE: BAD_SEQID means the server and client disagree about the
...@@ -635,6 +635,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, ...@@ -635,6 +635,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
fattr->valid = 0; fattr->valid = 0;
if (state != NULL)
msg.rpc_cred = state->owner->so_cred;
if (sattr->ia_valid & ATTR_SIZE) if (sattr->ia_valid & ATTR_SIZE)
nfs4_copy_stateid(&arg.stateid, state, NULL); nfs4_copy_stateid(&arg.stateid, state, NULL);
else else
...@@ -658,6 +660,61 @@ int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, ...@@ -658,6 +660,61 @@ int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
return err; return err;
} }
struct nfs4_closedata {
struct inode *inode;
struct nfs4_state *state;
struct nfs_closeargs arg;
struct nfs_closeres res;
};
static void nfs4_close_done(struct rpc_task *task)
{
struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
struct nfs4_state *state = calldata->state;
struct nfs4_state_owner *sp = state->owner;
struct nfs_server *server = NFS_SERVER(calldata->inode);
/* hmm. we are done with the inode, and in the process of freeing
* the state_owner. we keep this around to process errors
*/
nfs4_increment_seqid(task->tk_status, sp);
switch (task->tk_status) {
case 0:
state->state = calldata->arg.open_flags;
memcpy(&state->stateid, &calldata->res.stateid,
sizeof(state->stateid));
break;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
state->state = calldata->arg.open_flags;
nfs4_schedule_state_recovery(server->nfs4_state);
break;
default:
if (nfs4_async_handle_error(task, server) == -EAGAIN) {
rpc_restart_call(task);
return;
}
}
nfs4_put_open_state(state);
up(&sp->so_sema);
nfs4_put_state_owner(sp);
up_read(&server->nfs4_state->cl_sem);
kfree(calldata);
}
static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
.rpc_argp = &calldata->arg,
.rpc_resp = &calldata->res,
.rpc_cred = calldata->state->owner->so_cred,
};
if (calldata->arg.open_flags != 0)
msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
}
/* /*
* It is possible for data to be read/written from a mem-mapped file * It is possible for data to be read/written from a mem-mapped file
* after the sys_close call (which hits the vfs layer as a flush). * after the sys_close call (which hits the vfs layer as a flush).
...@@ -669,102 +726,34 @@ int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, ...@@ -669,102 +726,34 @@ int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
* *
* NOTE: Caller must be holding the sp->so_owner semaphore! * NOTE: Caller must be holding the sp->so_owner semaphore!
*/ */
static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state) int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode)
{ {
struct nfs4_state_owner *sp = state->owner; struct nfs4_closedata *calldata;
int status = 0; int status;
struct nfs_closeargs arg = {
.fh = NFS_FH(inode),
};
struct nfs_closeres res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
.rpc_argp = &arg,
.rpc_resp = &res,
};
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) /* Tell caller we're done */
if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
state->state = mode;
return 0; return 0;
memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); }
calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
if (calldata == NULL)
return -ENOMEM;
calldata->inode = inode;
calldata->state = state;
calldata->arg.fh = NFS_FH(inode);
/* Serialization for the sequence id */ /* Serialization for the sequence id */
arg.seqid = sp->so_seqid, calldata->arg.seqid = state->owner->so_seqid;
status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR); calldata->arg.open_flags = mode;
memcpy(&calldata->arg.stateid, &state->stateid,
/* hmm. we are done with the inode, and in the process of freeing sizeof(calldata->arg.stateid));
* the state_owner. we keep this around to process errors status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
/*
* Return -EINPROGRESS on success in order to indicate to the
* caller that an asynchronous RPC call has been launched, and
* that it will release the semaphores on completion.
*/ */
nfs4_increment_seqid(status, sp); return (status == 0) ? -EINPROGRESS : status;
if (!status)
memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
return status;
}
int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_exception exception = { };
int err;
do {
err = _nfs4_do_close(inode, state);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
nfs4_schedule_state_recovery(server->nfs4_state);
err = 0;
default:
state->state = 0;
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
{
struct nfs4_state_owner *sp = state->owner;
int status = 0;
struct nfs_closeargs arg = {
.fh = NFS_FH(inode),
.seqid = sp->so_seqid,
.open_flags = mode,
};
struct nfs_closeres res;
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE],
.rpc_argp = &arg,
.rpc_resp = &res,
};
if (test_bit(NFS_DELEGATED_STATE, &state->flags))
return 0;
memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
nfs4_increment_seqid(status, sp);
if (!status)
memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
return status;
}
int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_exception exception = { };
int err;
do {
err = _nfs4_do_downgrade(inode, state, mode);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
nfs4_schedule_state_recovery(server->nfs4_state);
err = 0;
default:
state->state = mode;
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
} }
struct inode * struct inode *
...@@ -785,7 +774,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) ...@@ -785,7 +774,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
} }
cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
put_rpccred(cred); put_rpccred(cred);
if (IS_ERR(state)) if (IS_ERR(state))
return (struct inode *)state; return (struct inode *)state;
...@@ -802,7 +791,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) ...@@ -802,7 +791,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
state = nfs4_open_delegated(dentry->d_inode, openflags, cred); state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
if (IS_ERR(state)) if (IS_ERR(state))
state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
put_rpccred(cred); put_rpccred(cred);
if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
return 1; return 1;
...@@ -1026,7 +1015,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, ...@@ -1026,7 +1015,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
FMODE_WRITE, cred); FMODE_WRITE, cred);
if (IS_ERR(state)) if (IS_ERR(state))
state = nfs4_do_open(dentry->d_parent->d_inode, state = nfs4_do_open(dentry->d_parent->d_inode,
&dentry->d_name, FMODE_WRITE, dentry, FMODE_WRITE,
NULL, cred); NULL, cred);
need_iput = 1; need_iput = 1;
} }
...@@ -1327,7 +1316,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata) ...@@ -1327,7 +1316,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata)
*/ */
static struct inode * static struct inode *
nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags) int flags)
{ {
struct inode *inode; struct inode *inode;
...@@ -1335,7 +1324,7 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, ...@@ -1335,7 +1324,7 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
struct rpc_cred *cred; struct rpc_cred *cred;
cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
state = nfs4_do_open(dir, name, flags, sattr, cred); state = nfs4_do_open(dir, dentry, flags, sattr, cred);
put_rpccred(cred); put_rpccred(cred);
if (!IS_ERR(state)) { if (!IS_ERR(state)) {
inode = state->inode; inode = state->inode;
......
...@@ -445,7 +445,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) ...@@ -445,7 +445,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
state->owner = owner; state->owner = owner;
atomic_inc(&owner->so_count); atomic_inc(&owner->so_count);
list_add(&state->inode_states, &nfsi->open_states); list_add(&state->inode_states, &nfsi->open_states);
state->inode = inode; state->inode = igrab(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} else { } else {
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
...@@ -471,6 +471,7 @@ void nfs4_put_open_state(struct nfs4_state *state) ...@@ -471,6 +471,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
list_del(&state->inode_states); list_del(&state->inode_states);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
list_del(&state->open_states); list_del(&state->open_states);
iput(inode);
BUG_ON (state->state != 0); BUG_ON (state->state != 0);
nfs4_free_open_state(state); nfs4_free_open_state(state);
nfs4_put_state_owner(owner); nfs4_put_state_owner(owner);
...@@ -486,7 +487,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) ...@@ -486,7 +487,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
struct nfs4_state_owner *owner = state->owner; struct nfs4_state_owner *owner = state->owner;
struct nfs4_client *clp = owner->so_client; struct nfs4_client *clp = owner->so_client;
int newstate; int newstate;
int status = 0;
atomic_inc(&owner->so_count); atomic_inc(&owner->so_count);
down_read(&clp->cl_sem); down_read(&clp->cl_sem);
...@@ -508,10 +508,8 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode) ...@@ -508,10 +508,8 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
newstate |= FMODE_WRITE; newstate |= FMODE_WRITE;
if (state->state == newstate) if (state->state == newstate)
goto out; goto out;
if (newstate != 0) if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
status = nfs4_do_downgrade(inode, state, newstate); return;
else
status = nfs4_do_close(inode, state);
} }
out: out:
nfs4_put_open_state(state); nfs4_put_open_state(state);
......
...@@ -63,12 +63,12 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, ...@@ -63,12 +63,12 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk("%s: call getattr\n", __FUNCTION__); dprintk("%s: call getattr\n", __FUNCTION__);
fattr->valid = 0; fattr->valid = 0;
status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
dprintk("%s: reply getattr %d\n", __FUNCTION__, status); dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
if (status) if (status)
return status; return status;
dprintk("%s: call statfs\n", __FUNCTION__); dprintk("%s: call statfs\n", __FUNCTION__);
status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
dprintk("%s: reply statfs %d\n", __FUNCTION__, status); dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
if (status) if (status)
return status; return status;
info->rtmax = NFS_MAXDATA; info->rtmax = NFS_MAXDATA;
...@@ -96,7 +96,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, ...@@ -96,7 +96,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
fattr->valid = 0; fattr->valid = 0;
status = rpc_call(server->client, NFSPROC_GETATTR, status = rpc_call(server->client, NFSPROC_GETATTR,
fhandle, fattr, 0); fhandle, fattr, 0);
dprintk("NFS reply getattr\n"); dprintk("NFS reply getattr: %d\n", status);
return status; return status;
} }
...@@ -114,7 +114,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, ...@@ -114,7 +114,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
dprintk("NFS call setattr\n"); dprintk("NFS call setattr\n");
fattr->valid = 0; fattr->valid = 0;
status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
dprintk("NFS reply setattr\n"); dprintk("NFS reply setattr: %d\n", status);
return status; return status;
} }
...@@ -213,15 +213,15 @@ static int nfs_proc_write(struct nfs_write_data *wdata) ...@@ -213,15 +213,15 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
} }
static struct inode * static struct inode *
nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags) int flags)
{ {
struct nfs_fh fhandle; struct nfs_fh fhandle;
struct nfs_fattr fattr; struct nfs_fattr fattr;
struct nfs_createargs arg = { struct nfs_createargs arg = {
.fh = NFS_FH(dir), .fh = NFS_FH(dir),
.name = name->name, .name = dentry->d_name.name,
.len = name->len, .len = dentry->d_name.len,
.sattr = sattr .sattr = sattr
}; };
struct nfs_diropok res = { struct nfs_diropok res = {
...@@ -231,7 +231,7 @@ nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr, ...@@ -231,7 +231,7 @@ nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
int status; int status;
fattr.valid = 0; fattr.valid = 0;
dprintk("NFS call create %s\n", name->name); dprintk("NFS call create %s\n", dentry->d_name.name);
status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
dprintk("NFS reply create: %d\n", status); dprintk("NFS reply create: %d\n", status);
if (status == 0) { if (status == 0) {
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/mempool.h>
#include <linux/sunrpc/clnt.h> #include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h> #include <linux/nfs_fs.h>
#include <linux/nfs_page.h> #include <linux/nfs_page.h>
...@@ -39,25 +38,11 @@ static void nfs_readpage_result_partial(struct nfs_read_data *, int); ...@@ -39,25 +38,11 @@ static void nfs_readpage_result_partial(struct nfs_read_data *, int);
static void nfs_readpage_result_full(struct nfs_read_data *, int); static void nfs_readpage_result_full(struct nfs_read_data *, int);
static kmem_cache_t *nfs_rdata_cachep; static kmem_cache_t *nfs_rdata_cachep;
static mempool_t *nfs_rdata_mempool; mempool_t *nfs_rdata_mempool;
#define MIN_POOL_READ (32) #define MIN_POOL_READ (32)
static struct nfs_read_data *nfs_readdata_alloc(void) void nfs_readdata_release(struct rpc_task *task)
{
struct nfs_read_data *p;
p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
if (p)
memset(p, 0, sizeof(*p));
return p;
}
static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
{
mempool_free(p, nfs_rdata_mempool);
}
static void nfs_readdata_release(struct rpc_task *task)
{ {
struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
nfs_readdata_free(data); nfs_readdata_free(data);
......
...@@ -215,7 +215,6 @@ nfs_complete_unlink(struct dentry *dentry) ...@@ -215,7 +215,6 @@ nfs_complete_unlink(struct dentry *dentry)
spin_lock(&dentry->d_lock); spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
spin_unlock(&dentry->d_lock); spin_unlock(&dentry->d_lock);
if (data->task.tk_rpcwait == &nfs_delete_queue) rpc_wake_up_task(&data->task);
rpc_wake_up_task(&data->task);
nfs_put_unlinkdata(data); nfs_put_unlinkdata(data);
} }
...@@ -61,7 +61,6 @@ ...@@ -61,7 +61,6 @@
#include <linux/nfs_page.h> #include <linux/nfs_page.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/mempool.h>
#include "delegation.h" #include "delegation.h"
...@@ -83,49 +82,17 @@ static int nfs_wait_on_write_congestion(struct address_space *, int); ...@@ -83,49 +82,17 @@ static int nfs_wait_on_write_congestion(struct address_space *, int);
static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
static kmem_cache_t *nfs_wdata_cachep; static kmem_cache_t *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool; mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool; mempool_t *nfs_commit_mempool;
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) void nfs_writedata_release(struct rpc_task *task)
{
struct nfs_write_data *p;
p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
{
mempool_free(p, nfs_wdata_mempool);
}
static void nfs_writedata_release(struct rpc_task *task)
{ {
struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata;
nfs_writedata_free(wdata); nfs_writedata_free(wdata);
} }
static __inline__ struct nfs_write_data *nfs_commit_alloc(void)
{
struct nfs_write_data *p;
p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static __inline__ void nfs_commit_free(struct nfs_write_data *p)
{
mempool_free(p, nfs_commit_mempool);
}
/* Adjust the file length if we're writing beyond the end */ /* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{ {
...@@ -184,11 +151,10 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, ...@@ -184,11 +151,10 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
int result, written = 0; int result, written = 0;
struct nfs_write_data *wdata; struct nfs_write_data *wdata;
wdata = kmalloc(sizeof(*wdata), GFP_NOFS); wdata = nfs_writedata_alloc();
if (!wdata) if (!wdata)
return -ENOMEM; return -ENOMEM;
memset(wdata, 0, sizeof(*wdata));
wdata->flags = how; wdata->flags = how;
wdata->cred = ctx->cred; wdata->cred = ctx->cred;
wdata->inode = inode; wdata->inode = inode;
...@@ -238,8 +204,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, ...@@ -238,8 +204,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
io_error: io_error:
nfs_end_data_update_defer(inode); nfs_end_data_update_defer(inode);
nfs_writedata_free(wdata);
kfree(wdata);
return written ? written : result; return written ? written : result;
} }
...@@ -1199,7 +1164,8 @@ void nfs_writeback_done(struct rpc_task *task) ...@@ -1199,7 +1164,8 @@ void nfs_writeback_done(struct rpc_task *task)
} }
if (time_before(complain, jiffies)) { if (time_before(complain, jiffies)) {
printk(KERN_WARNING printk(KERN_WARNING
"NFS: Server wrote less than requested.\n"); "NFS: Server wrote zero bytes, expected %u.\n",
argp->count);
complain = jiffies + 300 * HZ; complain = jiffies + 300 * HZ;
} }
/* Can't do anything about it except throw an error. */ /* Can't do anything about it except throw an error. */
......
...@@ -199,6 +199,7 @@ static inline int dname_external(struct dentry *dentry) ...@@ -199,6 +199,7 @@ static inline int dname_external(struct dentry *dentry)
* These are the low-level FS interfaces to the dcache.. * These are the low-level FS interfaces to the dcache..
*/ */
extern void d_instantiate(struct dentry *, struct inode *); extern void d_instantiate(struct dentry *, struct inode *);
extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
extern void d_delete(struct dentry *); extern void d_delete(struct dentry *);
/* allocate/de-allocate */ /* allocate/de-allocate */
...@@ -242,6 +243,23 @@ static inline void d_add(struct dentry *entry, struct inode *inode) ...@@ -242,6 +243,23 @@ static inline void d_add(struct dentry *entry, struct inode *inode)
d_rehash(entry); d_rehash(entry);
} }
/**
* d_add_unique - add dentry to hash queues without aliasing
* @entry: dentry to add
* @inode: The inode to attach to this dentry
*
* This adds the entry to the hash queues and initializes @inode.
* The entry was actually filled in earlier during d_alloc().
*/
static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode)
{
struct dentry *res;
res = d_instantiate_unique(entry, inode);
d_rehash(res != NULL ? res : entry);
return res;
}
/* used for rename() and baskets */ /* used for rename() and baskets */
extern void d_move(struct dentry *, struct dentry *); extern void d_move(struct dentry *, struct dentry *);
......
...@@ -1189,11 +1189,6 @@ extern long do_mount(char *, char *, char *, unsigned long, void *); ...@@ -1189,11 +1189,6 @@ extern long do_mount(char *, char *, char *, unsigned long, void *);
extern int vfs_statfs(struct super_block *, struct kstatfs *); extern int vfs_statfs(struct super_block *, struct kstatfs *);
/* Return value for VFS lock functions - tells locks.c to lock conventionally
* REALLY kosha for root NFS and nfs_lock
*/
#define LOCK_USE_CLNT 1
#define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_READ 1
#define FLOCK_VERIFY_WRITE 2 #define FLOCK_VERIFY_WRITE 2
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/nfs_xdr.h> #include <linux/nfs_xdr.h>
#include <linux/rwsem.h> #include <linux/rwsem.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/mempool.h>
/* /*
* Enable debugging support for nfs client. * Enable debugging support for nfs client.
...@@ -201,6 +202,7 @@ struct nfs_inode { ...@@ -201,6 +202,7 @@ struct nfs_inode {
#define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */ #define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */
#define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */ #define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */
#define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ #define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */
#define NFS_INO_INVALID_ACCESS 0x0040 /* cached access cred invalid */
static inline struct nfs_inode *NFS_I(struct inode *inode) static inline struct nfs_inode *NFS_I(struct inode *inode)
{ {
...@@ -239,7 +241,7 @@ static inline int nfs_caches_unstable(struct inode *inode) ...@@ -239,7 +241,7 @@ static inline int nfs_caches_unstable(struct inode *inode)
static inline void NFS_CACHEINV(struct inode *inode) static inline void NFS_CACHEINV(struct inode *inode)
{ {
if (!nfs_caches_unstable(inode)) if (!nfs_caches_unstable(inode))
NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR; NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
} }
static inline int nfs_server_capable(struct inode *inode, int cap) static inline int nfs_server_capable(struct inode *inode, int cap)
...@@ -424,6 +426,44 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page) ...@@ -424,6 +426,44 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
return nfs_wb_page_priority(inode, page, 0); return nfs_wb_page_priority(inode, page, 0);
} }
/*
* Allocate and free nfs_write_data structures
*/
extern mempool_t *nfs_wdata_mempool;
extern mempool_t *nfs_commit_mempool;
static inline struct nfs_write_data *nfs_writedata_alloc(void)
{
struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static inline void nfs_writedata_free(struct nfs_write_data *p)
{
mempool_free(p, nfs_wdata_mempool);
}
extern void nfs_writedata_release(struct rpc_task *task);
static inline struct nfs_write_data *nfs_commit_alloc(void)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
}
return p;
}
static inline void nfs_commit_free(struct nfs_write_data *p)
{
mempool_free(p, nfs_commit_mempool);
}
/* Hack for future NFS swap support */ /* Hack for future NFS swap support */
#ifndef IS_SWAPFILE #ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode) (0) # define IS_SWAPFILE(inode) (0)
...@@ -438,6 +478,26 @@ extern int nfs_readpages(struct file *, struct address_space *, ...@@ -438,6 +478,26 @@ extern int nfs_readpages(struct file *, struct address_space *,
extern int nfs_pagein_list(struct list_head *, int); extern int nfs_pagein_list(struct list_head *, int);
extern void nfs_readpage_result(struct rpc_task *); extern void nfs_readpage_result(struct rpc_task *);
/*
* Allocate and free nfs_read_data structures
*/
extern mempool_t *nfs_rdata_mempool;
static inline struct nfs_read_data *nfs_readdata_alloc(void)
{
struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
if (p)
memset(p, 0, sizeof(*p));
return p;
}
static inline void nfs_readdata_free(struct nfs_read_data *p)
{
mempool_free(p, nfs_rdata_mempool);
}
extern void nfs_readdata_release(struct rpc_task *task);
/* /*
* linux/fs/mount_clnt.c * linux/fs/mount_clnt.c
* (Used only by nfsroot module) * (Used only by nfsroot module)
...@@ -651,8 +711,7 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); ...@@ -651,8 +711,7 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_async_renew(struct nfs4_client *);
extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *);
extern int nfs4_do_close(struct inode *, struct nfs4_state *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
......
...@@ -681,7 +681,7 @@ struct nfs_rpc_ops { ...@@ -681,7 +681,7 @@ struct nfs_rpc_ops {
int (*read) (struct nfs_read_data *); int (*read) (struct nfs_read_data *);
int (*write) (struct nfs_write_data *); int (*write) (struct nfs_write_data *);
int (*commit) (struct nfs_write_data *); int (*commit) (struct nfs_write_data *);
struct inode * (*create) (struct inode *, struct qstr *, struct inode * (*create) (struct inode *, struct dentry *,
struct iattr *, int); struct iattr *, int);
int (*remove) (struct inode *, struct qstr *); int (*remove) (struct inode *, struct qstr *);
int (*unlink_setup) (struct rpc_message *, int (*unlink_setup) (struct rpc_message *,
......
...@@ -51,7 +51,6 @@ struct rpc_cred { ...@@ -51,7 +51,6 @@ struct rpc_cred {
}; };
#define RPCAUTH_CRED_LOCKED 0x0001 #define RPCAUTH_CRED_LOCKED 0x0001
#define RPCAUTH_CRED_UPTODATE 0x0002 #define RPCAUTH_CRED_UPTODATE 0x0002
#define RPCAUTH_CRED_DEAD 0x0004
#define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0
...@@ -131,7 +130,6 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, ...@@ -131,7 +130,6 @@ int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
int rpcauth_refreshcred(struct rpc_task *); int rpcauth_refreshcred(struct rpc_task *);
void rpcauth_invalcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *);
int rpcauth_uptodatecred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *);
int rpcauth_deadcred(struct rpc_task *);
void rpcauth_init_credcache(struct rpc_auth *); void rpcauth_init_credcache(struct rpc_auth *);
void rpcauth_free_credcache(struct rpc_auth *); void rpcauth_free_credcache(struct rpc_auth *);
......
...@@ -11,7 +11,9 @@ ...@@ -11,7 +11,9 @@
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/sunrpc/types.h> #include <linux/sunrpc/types.h>
#include <linux/spinlock.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/workqueue.h>
#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/xdr.h>
/* /*
...@@ -25,11 +27,18 @@ struct rpc_message { ...@@ -25,11 +27,18 @@ struct rpc_message {
struct rpc_cred * rpc_cred; /* Credentials */ struct rpc_cred * rpc_cred; /* Credentials */
}; };
struct rpc_wait_queue;
struct rpc_wait {
struct list_head list; /* wait queue links */
struct list_head links; /* Links to related tasks */
wait_queue_head_t waitq; /* sync: sleep on this q */
struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */
};
/* /*
* This is the RPC task struct * This is the RPC task struct
*/ */
struct rpc_task { struct rpc_task {
struct list_head tk_list; /* wait queue links */
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
unsigned long tk_magic; /* 0xf00baa */ unsigned long tk_magic; /* 0xf00baa */
#endif #endif
...@@ -37,7 +46,6 @@ struct rpc_task { ...@@ -37,7 +46,6 @@ struct rpc_task {
struct rpc_clnt * tk_client; /* RPC client */ struct rpc_clnt * tk_client; /* RPC client */
struct rpc_rqst * tk_rqstp; /* RPC request */ struct rpc_rqst * tk_rqstp; /* RPC request */
int tk_status; /* result of last operation */ int tk_status; /* result of last operation */
struct rpc_wait_queue * tk_rpcwait; /* RPC wait queue we're on */
/* /*
* RPC call state * RPC call state
...@@ -70,13 +78,18 @@ struct rpc_task { ...@@ -70,13 +78,18 @@ struct rpc_task {
* you have a pathological interest in kernel oopses. * you have a pathological interest in kernel oopses.
*/ */
struct timer_list tk_timer; /* kernel timer */ struct timer_list tk_timer; /* kernel timer */
wait_queue_head_t tk_wait; /* sync: sleep on this q */
unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned long tk_timeout; /* timeout for rpc_sleep() */
unsigned short tk_flags; /* misc flags */ unsigned short tk_flags; /* misc flags */
unsigned char tk_active : 1;/* Task has been activated */ unsigned char tk_active : 1;/* Task has been activated */
unsigned char tk_priority : 2;/* Task priority */ unsigned char tk_priority : 2;/* Task priority */
unsigned long tk_runstate; /* Task run status */ unsigned long tk_runstate; /* Task run status */
struct list_head tk_links; /* links to related tasks */ struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could
* be any workqueue
*/
union {
struct work_struct tk_work; /* Async task work queue */
struct rpc_wait tk_wait; /* RPC wait */
} u;
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
unsigned short tk_pid; /* debugging aid */ unsigned short tk_pid; /* debugging aid */
#endif #endif
...@@ -87,11 +100,11 @@ struct rpc_task { ...@@ -87,11 +100,11 @@ struct rpc_task {
/* support walking a list of tasks on a wait queue */ /* support walking a list of tasks on a wait queue */
#define task_for_each(task, pos, head) \ #define task_for_each(task, pos, head) \
list_for_each(pos, head) \ list_for_each(pos, head) \
if ((task=list_entry(pos, struct rpc_task, tk_list)),1) if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1)
#define task_for_first(task, head) \ #define task_for_first(task, head) \
if (!list_empty(head) && \ if (!list_empty(head) && \
((task=list_entry((head)->next, struct rpc_task, tk_list)),1)) ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
/* .. and walking list of all tasks */ /* .. and walking list of all tasks */
#define alltask_for_each(task, pos, head) \ #define alltask_for_each(task, pos, head) \
...@@ -126,22 +139,39 @@ typedef void (*rpc_action)(struct rpc_task *); ...@@ -126,22 +139,39 @@ typedef void (*rpc_action)(struct rpc_task *);
#define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT)
#define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR) #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
#define RPC_TASK_SLEEPING 0 #define RPC_TASK_RUNNING 0
#define RPC_TASK_RUNNING 1 #define RPC_TASK_QUEUED 1
#define RPC_IS_SLEEPING(t) (test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) #define RPC_TASK_WAKEUP 2
#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define RPC_TASK_HAS_TIMER 3
#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
#define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
#define rpc_clear_running(t) (clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define rpc_test_and_set_running(t) \
(test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
#define rpc_clear_running(t) \
do { \
smp_mb__before_clear_bit(); \
clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \
smp_mb__after_clear_bit(); \
} while (0)
#define rpc_set_sleeping(t) (set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) #define RPC_IS_QUEUED(t) (test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
#define rpc_set_queued(t) (set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
#define rpc_clear_queued(t) \
do { \
smp_mb__before_clear_bit(); \
clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \
smp_mb__after_clear_bit(); \
} while (0)
#define rpc_clear_sleeping(t) \ #define rpc_start_wakeup(t) \
(test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0)
#define rpc_finish_wakeup(t) \
do { \ do { \
smp_mb__before_clear_bit(); \ smp_mb__before_clear_bit(); \
clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \ clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \
smp_mb__after_clear_bit(); \ smp_mb__after_clear_bit(); \
} while(0) } while (0)
/* /*
* Task priorities. * Task priorities.
...@@ -157,6 +187,7 @@ typedef void (*rpc_action)(struct rpc_task *); ...@@ -157,6 +187,7 @@ typedef void (*rpc_action)(struct rpc_task *);
* RPC synchronization objects * RPC synchronization objects
*/ */
struct rpc_wait_queue { struct rpc_wait_queue {
spinlock_t lock;
struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
unsigned long cookie; /* cookie of last task serviced */ unsigned long cookie; /* cookie of last task serviced */
unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */
...@@ -177,6 +208,7 @@ struct rpc_wait_queue { ...@@ -177,6 +208,7 @@ struct rpc_wait_queue {
#ifndef RPC_DEBUG #ifndef RPC_DEBUG
# define RPC_WAITQ_INIT(var,qname) { \ # define RPC_WAITQ_INIT(var,qname) { \
.lock = SPIN_LOCK_UNLOCKED, \
.tasks = { \ .tasks = { \
[0] = LIST_HEAD_INIT(var.tasks[0]), \ [0] = LIST_HEAD_INIT(var.tasks[0]), \
[1] = LIST_HEAD_INIT(var.tasks[1]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \
...@@ -185,6 +217,7 @@ struct rpc_wait_queue { ...@@ -185,6 +217,7 @@ struct rpc_wait_queue {
} }
#else #else
# define RPC_WAITQ_INIT(var,qname) { \ # define RPC_WAITQ_INIT(var,qname) { \
.lock = SPIN_LOCK_UNLOCKED, \
.tasks = { \ .tasks = { \
[0] = LIST_HEAD_INIT(var.tasks[0]), \ [0] = LIST_HEAD_INIT(var.tasks[0]), \
[1] = LIST_HEAD_INIT(var.tasks[1]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \
...@@ -209,13 +242,10 @@ void rpc_killall_tasks(struct rpc_clnt *); ...@@ -209,13 +242,10 @@ void rpc_killall_tasks(struct rpc_clnt *);
int rpc_execute(struct rpc_task *); int rpc_execute(struct rpc_task *);
void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, void rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
rpc_action action); rpc_action action);
int rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
void rpc_remove_wait_queue(struct rpc_task *);
void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
rpc_action action, rpc_action timer); rpc_action action, rpc_action timer);
void rpc_add_timer(struct rpc_task *, rpc_action);
void rpc_wake_up_task(struct rpc_task *); void rpc_wake_up_task(struct rpc_task *);
void rpc_wake_up(struct rpc_wait_queue *); void rpc_wake_up(struct rpc_wait_queue *);
struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
......
...@@ -214,8 +214,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, ...@@ -214,8 +214,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
list_for_each_safe(pos, next, &auth->au_credcache[nr]) { list_for_each_safe(pos, next, &auth->au_credcache[nr]) {
struct rpc_cred *entry; struct rpc_cred *entry;
entry = list_entry(pos, struct rpc_cred, cr_hash); entry = list_entry(pos, struct rpc_cred, cr_hash);
if (entry->cr_flags & RPCAUTH_CRED_DEAD)
continue;
if (rpcauth_prune_expired(entry, &free)) if (rpcauth_prune_expired(entry, &free))
continue; continue;
if (entry->cr_ops->crmatch(acred, entry, taskflags)) { if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
...@@ -307,9 +305,6 @@ put_rpccred(struct rpc_cred *cred) ...@@ -307,9 +305,6 @@ put_rpccred(struct rpc_cred *cred)
if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
return; return;
if ((cred->cr_flags & RPCAUTH_CRED_DEAD) && !list_empty(&cred->cr_hash))
list_del_init(&cred->cr_hash);
if (list_empty(&cred->cr_hash)) { if (list_empty(&cred->cr_hash)) {
spin_unlock(&rpc_credcache_lock); spin_unlock(&rpc_credcache_lock);
rpcauth_crdestroy(cred); rpcauth_crdestroy(cred);
...@@ -413,10 +408,3 @@ rpcauth_uptodatecred(struct rpc_task *task) ...@@ -413,10 +408,3 @@ rpcauth_uptodatecred(struct rpc_task *task)
return !(task->tk_msg.rpc_cred) || return !(task->tk_msg.rpc_cred) ||
(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE); (task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
} }
int
rpcauth_deadcred(struct rpc_task *task)
{
return !(task->tk_msg.rpc_cred) ||
(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_DEAD);
}
...@@ -480,12 +480,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) ...@@ -480,12 +480,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (!cred) if (!cred)
goto err; goto err;
if (gss_err) if (gss_err)
cred->cr_flags |= RPCAUTH_CRED_DEAD; cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
else else
gss_cred_set_ctx(cred, ctx); gss_cred_set_ctx(cred, ctx);
spin_lock(&gss_auth->lock); spin_lock(&gss_auth->lock);
gss_msg = __gss_find_upcall(gss_auth, acred.uid); gss_msg = __gss_find_upcall(gss_auth, acred.uid);
if (gss_msg) { if (gss_msg) {
if (gss_err)
gss_msg->msg.errno = -EACCES;
__gss_unhash_msg(gss_msg); __gss_unhash_msg(gss_msg);
spin_unlock(&gss_auth->lock); spin_unlock(&gss_auth->lock);
gss_release_msg(gss_msg); gss_release_msg(gss_msg);
...@@ -740,7 +742,9 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid) ...@@ -740,7 +742,9 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid)
maj_stat = gss_get_mic(ctx->gc_gss_ctx, maj_stat = gss_get_mic(ctx->gc_gss_ctx,
GSS_C_QOP_DEFAULT, GSS_C_QOP_DEFAULT,
&verf_buf, &mic); &verf_buf, &mic);
if(maj_stat != 0){ if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
} else if (maj_stat != 0) {
printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
goto out_put_ctx; goto out_put_ctx;
} }
...@@ -779,6 +783,7 @@ gss_validate(struct rpc_task *task, u32 *p) ...@@ -779,6 +783,7 @@ gss_validate(struct rpc_task *task, u32 *p)
struct xdr_netobj mic; struct xdr_netobj mic;
u32 flav,len; u32 flav,len;
u32 service; u32 service;
u32 maj_stat;
dprintk("RPC: %4u gss_validate\n", task->tk_pid); dprintk("RPC: %4u gss_validate\n", task->tk_pid);
...@@ -794,8 +799,11 @@ gss_validate(struct rpc_task *task, u32 *p) ...@@ -794,8 +799,11 @@ gss_validate(struct rpc_task *task, u32 *p)
mic.data = (u8 *)p; mic.data = (u8 *)p;
mic.len = len; mic.len = len;
if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state)) maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state);
goto out_bad; if (maj_stat == GSS_S_CONTEXT_EXPIRED)
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
if (maj_stat)
goto out_bad;
service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type, service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type,
gss_cred->gc_flavor); gss_cred->gc_flavor);
switch (service) { switch (service) {
...@@ -821,11 +829,10 @@ gss_validate(struct rpc_task *task, u32 *p) ...@@ -821,11 +829,10 @@ gss_validate(struct rpc_task *task, u32 *p)
} }
static inline int static inline int
gss_wrap_req_integ(struct gss_cl_ctx *ctx, gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
kxdrproc_t encode, void *rqstp, u32 *p, void *obj) kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
{ {
struct rpc_rqst *req = (struct rpc_rqst *)rqstp; struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
struct xdr_buf *snd_buf = &req->rq_snd_buf;
struct xdr_buf integ_buf; struct xdr_buf integ_buf;
u32 *integ_len = NULL; u32 *integ_len = NULL;
struct xdr_netobj mic; struct xdr_netobj mic;
...@@ -836,7 +843,7 @@ gss_wrap_req_integ(struct gss_cl_ctx *ctx, ...@@ -836,7 +843,7 @@ gss_wrap_req_integ(struct gss_cl_ctx *ctx,
integ_len = p++; integ_len = p++;
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
*p++ = htonl(req->rq_seqno); *p++ = htonl(rqstp->rq_seqno);
status = encode(rqstp, p, obj); status = encode(rqstp, p, obj);
if (status) if (status)
...@@ -858,7 +865,9 @@ gss_wrap_req_integ(struct gss_cl_ctx *ctx, ...@@ -858,7 +865,9 @@ gss_wrap_req_integ(struct gss_cl_ctx *ctx,
maj_stat = gss_get_mic(ctx->gc_gss_ctx, maj_stat = gss_get_mic(ctx->gc_gss_ctx,
GSS_C_QOP_DEFAULT, &integ_buf, &mic); GSS_C_QOP_DEFAULT, &integ_buf, &mic);
status = -EIO; /* XXX? */ status = -EIO; /* XXX? */
if (maj_stat) if (maj_stat == GSS_S_CONTEXT_EXPIRED)
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
else if (maj_stat)
return status; return status;
q = xdr_encode_opaque(p, NULL, mic.len); q = xdr_encode_opaque(p, NULL, mic.len);
...@@ -894,7 +903,8 @@ gss_wrap_req(struct rpc_task *task, ...@@ -894,7 +903,8 @@ gss_wrap_req(struct rpc_task *task,
status = encode(rqstp, p, obj); status = encode(rqstp, p, obj);
goto out; goto out;
case RPC_GSS_SVC_INTEGRITY: case RPC_GSS_SVC_INTEGRITY:
status = gss_wrap_req_integ(ctx, encode, rqstp, p, obj); status = gss_wrap_req_integ(cred, ctx, encode,
rqstp, p, obj);
goto out; goto out;
case RPC_GSS_SVC_PRIVACY: case RPC_GSS_SVC_PRIVACY:
default: default:
...@@ -907,11 +917,10 @@ gss_wrap_req(struct rpc_task *task, ...@@ -907,11 +917,10 @@ gss_wrap_req(struct rpc_task *task,
} }
static inline int static inline int
gss_unwrap_resp_integ(struct gss_cl_ctx *ctx, gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
kxdrproc_t decode, void *rqstp, u32 **p, void *obj) struct rpc_rqst *rqstp, u32 **p)
{ {
struct rpc_rqst *req = (struct rpc_rqst *)rqstp; struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct xdr_buf *rcv_buf = &req->rq_rcv_buf;
struct xdr_buf integ_buf; struct xdr_buf integ_buf;
struct xdr_netobj mic; struct xdr_netobj mic;
u32 data_offset, mic_offset; u32 data_offset, mic_offset;
...@@ -926,7 +935,7 @@ gss_unwrap_resp_integ(struct gss_cl_ctx *ctx, ...@@ -926,7 +935,7 @@ gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
mic_offset = integ_len + data_offset; mic_offset = integ_len + data_offset;
if (mic_offset > rcv_buf->len) if (mic_offset > rcv_buf->len)
return status; return status;
if (ntohl(*(*p)++) != req->rq_seqno) if (ntohl(*(*p)++) != rqstp->rq_seqno)
return status; return status;
if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
...@@ -938,6 +947,8 @@ gss_unwrap_resp_integ(struct gss_cl_ctx *ctx, ...@@ -938,6 +947,8 @@ gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf,
&mic, NULL); &mic, NULL);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
if (maj_stat != GSS_S_COMPLETE) if (maj_stat != GSS_S_COMPLETE)
return status; return status;
return 0; return 0;
...@@ -962,8 +973,7 @@ gss_unwrap_resp(struct rpc_task *task, ...@@ -962,8 +973,7 @@ gss_unwrap_resp(struct rpc_task *task,
case RPC_GSS_SVC_NONE: case RPC_GSS_SVC_NONE:
goto out_decode; goto out_decode;
case RPC_GSS_SVC_INTEGRITY: case RPC_GSS_SVC_INTEGRITY:
status = gss_unwrap_resp_integ(ctx, decode, status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
rqstp, &p, obj);
if (status) if (status)
goto out; goto out;
break; break;
......
...@@ -928,7 +928,7 @@ call_refreshresult(struct rpc_task *task) ...@@ -928,7 +928,7 @@ call_refreshresult(struct rpc_task *task)
task->tk_action = call_reserve; task->tk_action = call_reserve;
if (status >= 0 && rpcauth_uptodatecred(task)) if (status >= 0 && rpcauth_uptodatecred(task))
return; return;
if (rpcauth_deadcred(task)) { if (status == -EACCES) {
rpc_exit(task, -EACCES); rpc_exit(task, -EACCES);
return; return;
} }
...@@ -970,23 +970,31 @@ call_verify(struct rpc_task *task) ...@@ -970,23 +970,31 @@ call_verify(struct rpc_task *task)
struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0]; struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
int len = task->tk_rqstp->rq_rcv_buf.len >> 2; int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
u32 *p = iov->iov_base, n; u32 *p = iov->iov_base, n;
int error = -EACCES;
if ((len -= 3) < 0) if ((len -= 3) < 0)
goto garbage; goto out_overflow;
p += 1; /* skip XID */ p += 1; /* skip XID */
if ((n = ntohl(*p++)) != RPC_REPLY) { if ((n = ntohl(*p++)) != RPC_REPLY) {
printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n); printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
goto garbage; goto out_retry;
} }
if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
int error = -EACCES;
if (--len < 0) if (--len < 0)
goto garbage; goto out_overflow;
if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) { switch ((n = ntohl(*p++))) {
printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n); case RPC_AUTH_ERROR:
} else if (--len < 0) break;
case RPC_MISMATCH:
printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__);
goto out_eio;
default:
printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
goto out_eio;
}
if (--len < 0)
goto out_overflow;
switch ((n = ntohl(*p++))) { switch ((n = ntohl(*p++))) {
case RPC_AUTH_REJECTEDCRED: case RPC_AUTH_REJECTEDCRED:
case RPC_AUTH_REJECTEDVERF: case RPC_AUTH_REJECTEDVERF:
...@@ -1017,20 +1025,18 @@ call_verify(struct rpc_task *task) ...@@ -1017,20 +1025,18 @@ call_verify(struct rpc_task *task)
default: default:
printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n); printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
error = -EIO; error = -EIO;
} else }
goto garbage;
dprintk("RPC: %4d call_verify: call rejected %d\n", dprintk("RPC: %4d call_verify: call rejected %d\n",
task->tk_pid, n); task->tk_pid, n);
rpc_exit(task, error); goto out_err;
return NULL;
} }
if (!(p = rpcauth_checkverf(task, p))) { if (!(p = rpcauth_checkverf(task, p))) {
printk(KERN_WARNING "call_verify: auth check failed\n"); printk(KERN_WARNING "call_verify: auth check failed\n");
goto garbage; /* bad verifier, retry */ goto out_retry; /* bad verifier, retry */
} }
len = p - (u32 *)iov->iov_base - 1; len = p - (u32 *)iov->iov_base - 1;
if (len < 0) if (len < 0)
goto garbage; goto out_overflow;
switch ((n = ntohl(*p++))) { switch ((n = ntohl(*p++))) {
case RPC_SUCCESS: case RPC_SUCCESS:
return p; return p;
...@@ -1053,23 +1059,28 @@ call_verify(struct rpc_task *task) ...@@ -1053,23 +1059,28 @@ call_verify(struct rpc_task *task)
task->tk_client->cl_server); task->tk_client->cl_server);
goto out_eio; goto out_eio;
case RPC_GARBAGE_ARGS: case RPC_GARBAGE_ARGS:
dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
break; /* retry */ break; /* retry */
default: default:
printk(KERN_WARNING "call_verify: server accept status: %x\n", n); printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
/* Also retry */ /* Also retry */
} }
garbage: out_retry:
dprintk("RPC: %4d call_verify: server saw garbage\n", task->tk_pid);
task->tk_client->cl_stats->rpcgarbage++; task->tk_client->cl_stats->rpcgarbage++;
if (task->tk_garb_retry) { if (task->tk_garb_retry) {
task->tk_garb_retry--; task->tk_garb_retry--;
dprintk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid); dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
task->tk_action = call_bind; task->tk_action = call_bind;
return NULL; return NULL;
} }
printk(KERN_WARNING "RPC: garbage, exit EIO\n"); printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
out_eio: out_eio:
rpc_exit(task, -EIO); error = -EIO;
out_err:
rpc_exit(task, error);
return NULL; return NULL;
out_overflow:
printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
goto out_retry;
} }
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
#define RPCDBG_FACILITY RPCDBG_SCHED #define RPCDBG_FACILITY RPCDBG_SCHED
#define RPC_TASK_MAGIC_ID 0xf00baa
static int rpc_task_id; static int rpc_task_id;
#endif #endif
...@@ -41,16 +42,9 @@ static mempool_t *rpc_buffer_mempool; ...@@ -41,16 +42,9 @@ static mempool_t *rpc_buffer_mempool;
static void __rpc_default_timer(struct rpc_task *task); static void __rpc_default_timer(struct rpc_task *task);
static void rpciod_killall(void); static void rpciod_killall(void);
static void rpc_free(struct rpc_task *task); static void rpc_free(struct rpc_task *task);
static void rpc_async_schedule(void *);
/*
* When an asynchronous RPC task is activated within a bottom half
* handler, or while executing another RPC task, it is put on
* schedq, and rpciod is woken up.
*/
static RPC_WAITQ(schedq, "schedq");
/* /*
* RPC tasks that create another task (e.g. for contacting the portmapper) * RPC tasks that create another task (e.g. for contacting the portmapper)
...@@ -71,18 +65,10 @@ static LIST_HEAD(all_tasks); ...@@ -71,18 +65,10 @@ static LIST_HEAD(all_tasks);
/* /*
* rpciod-related stuff * rpciod-related stuff
*/ */
static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
static DECLARE_COMPLETION(rpciod_killer);
static DECLARE_MUTEX(rpciod_sema); static DECLARE_MUTEX(rpciod_sema);
static unsigned int rpciod_users; static unsigned int rpciod_users;
static pid_t rpciod_pid; static struct workqueue_struct *rpciod_workqueue;
static int rpc_inhibit;
/*
* Spinlock for wait queues. Access to the latter also has to be
* interrupt-safe in order to allow timers to wake up sleeping tasks.
*/
static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
/* /*
* Spinlock for other critical sections of code. * Spinlock for other critical sections of code.
*/ */
...@@ -90,7 +76,7 @@ static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED; ...@@ -90,7 +76,7 @@ static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
/* /*
* Disable the timer for a given RPC task. Should be called with * Disable the timer for a given RPC task. Should be called with
* rpc_queue_lock and bh_disabled in order to avoid races within * queue->lock and bh_disabled in order to avoid races within
* rpc_run_timer(). * rpc_run_timer().
*/ */
static inline void static inline void
...@@ -108,19 +94,19 @@ __rpc_disable_timer(struct rpc_task *task) ...@@ -108,19 +94,19 @@ __rpc_disable_timer(struct rpc_task *task)
* without calling del_timer_sync(). The latter could cause a * without calling del_timer_sync(). The latter could cause a
* deadlock if called while we're holding spinlocks... * deadlock if called while we're holding spinlocks...
*/ */
static void static void rpc_run_timer(struct rpc_task *task)
rpc_run_timer(struct rpc_task *task)
{ {
void (*callback)(struct rpc_task *); void (*callback)(struct rpc_task *);
spin_lock_bh(&rpc_queue_lock);
callback = task->tk_timeout_fn; callback = task->tk_timeout_fn;
task->tk_timeout_fn = NULL; task->tk_timeout_fn = NULL;
spin_unlock_bh(&rpc_queue_lock); if (callback && RPC_IS_QUEUED(task)) {
if (callback) {
dprintk("RPC: %4d running timer\n", task->tk_pid); dprintk("RPC: %4d running timer\n", task->tk_pid);
callback(task); callback(task);
} }
smp_mb__before_clear_bit();
clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
smp_mb__after_clear_bit();
} }
/* /*
...@@ -139,29 +125,21 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer) ...@@ -139,29 +125,21 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer)
task->tk_timeout_fn = timer; task->tk_timeout_fn = timer;
else else
task->tk_timeout_fn = __rpc_default_timer; task->tk_timeout_fn = __rpc_default_timer;
set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
mod_timer(&task->tk_timer, jiffies + task->tk_timeout); mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
} }
/*
* Set up a timer for an already sleeping task.
*/
void rpc_add_timer(struct rpc_task *task, rpc_action timer)
{
spin_lock_bh(&rpc_queue_lock);
if (!RPC_IS_RUNNING(task))
__rpc_add_timer(task, timer);
spin_unlock_bh(&rpc_queue_lock);
}
/* /*
* Delete any timer for the current task. Because we use del_timer_sync(), * Delete any timer for the current task. Because we use del_timer_sync(),
* this function should never be called while holding rpc_queue_lock. * this function should never be called while holding queue->lock.
*/ */
static inline void static inline void
rpc_delete_timer(struct rpc_task *task) rpc_delete_timer(struct rpc_task *task)
{ {
if (del_timer_sync(&task->tk_timer)) if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
del_singleshot_timer_sync(&task->tk_timer);
dprintk("RPC: %4d deleting timer\n", task->tk_pid); dprintk("RPC: %4d deleting timer\n", task->tk_pid);
}
} }
/* /*
...@@ -172,16 +150,17 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r ...@@ -172,16 +150,17 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
struct list_head *q; struct list_head *q;
struct rpc_task *t; struct rpc_task *t;
INIT_LIST_HEAD(&task->u.tk_wait.links);
q = &queue->tasks[task->tk_priority]; q = &queue->tasks[task->tk_priority];
if (unlikely(task->tk_priority > queue->maxpriority)) if (unlikely(task->tk_priority > queue->maxpriority))
q = &queue->tasks[queue->maxpriority]; q = &queue->tasks[queue->maxpriority];
list_for_each_entry(t, q, tk_list) { list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_cookie == task->tk_cookie) { if (t->tk_cookie == task->tk_cookie) {
list_add_tail(&task->tk_list, &t->tk_links); list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
return; return;
} }
} }
list_add_tail(&task->tk_list, q); list_add_tail(&task->u.tk_wait.list, q);
} }
/* /*
...@@ -192,37 +171,21 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r ...@@ -192,37 +171,21 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
* improve overall performance. * improve overall performance.
* Everyone else gets appended to the queue to ensure proper FIFO behavior. * Everyone else gets appended to the queue to ensure proper FIFO behavior.
*/ */
static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
{ {
if (task->tk_rpcwait == queue) BUG_ON (RPC_IS_QUEUED(task));
return 0;
if (task->tk_rpcwait) {
printk(KERN_WARNING "RPC: doubly enqueued task!\n");
return -EWOULDBLOCK;
}
if (RPC_IS_PRIORITY(queue)) if (RPC_IS_PRIORITY(queue))
__rpc_add_wait_queue_priority(queue, task); __rpc_add_wait_queue_priority(queue, task);
else if (RPC_IS_SWAPPER(task)) else if (RPC_IS_SWAPPER(task))
list_add(&task->tk_list, &queue->tasks[0]); list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else else
list_add_tail(&task->tk_list, &queue->tasks[0]); list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
task->tk_rpcwait = queue; task->u.tk_wait.rpc_waitq = queue;
rpc_set_queued(task);
dprintk("RPC: %4d added to queue %p \"%s\"\n", dprintk("RPC: %4d added to queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue)); task->tk_pid, queue, rpc_qname(queue));
return 0;
}
int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
{
int result;
spin_lock_bh(&rpc_queue_lock);
result = __rpc_add_wait_queue(q, task);
spin_unlock_bh(&rpc_queue_lock);
return result;
} }
/* /*
...@@ -232,12 +195,12 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) ...@@ -232,12 +195,12 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
{ {
struct rpc_task *t; struct rpc_task *t;
if (!list_empty(&task->tk_links)) { if (!list_empty(&task->u.tk_wait.links)) {
t = list_entry(task->tk_links.next, struct rpc_task, tk_list); t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
list_move(&t->tk_list, &task->tk_list); list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
list_splice_init(&task->tk_links, &t->tk_links); list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
} }
list_del(&task->tk_list); list_del(&task->u.tk_wait.list);
} }
/* /*
...@@ -246,31 +209,17 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) ...@@ -246,31 +209,17 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
*/ */
static void __rpc_remove_wait_queue(struct rpc_task *task) static void __rpc_remove_wait_queue(struct rpc_task *task)
{ {
struct rpc_wait_queue *queue = task->tk_rpcwait; struct rpc_wait_queue *queue;
queue = task->u.tk_wait.rpc_waitq;
if (!queue)
return;
if (RPC_IS_PRIORITY(queue)) if (RPC_IS_PRIORITY(queue))
__rpc_remove_wait_queue_priority(task); __rpc_remove_wait_queue_priority(task);
else else
list_del(&task->tk_list); list_del(&task->u.tk_wait.list);
task->tk_rpcwait = NULL;
dprintk("RPC: %4d removed from queue %p \"%s\"\n", dprintk("RPC: %4d removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue)); task->tk_pid, queue, rpc_qname(queue));
} }
void
rpc_remove_wait_queue(struct rpc_task *task)
{
if (!task->tk_rpcwait)
return;
spin_lock_bh(&rpc_queue_lock);
__rpc_remove_wait_queue(task);
spin_unlock_bh(&rpc_queue_lock);
}
static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
{ {
queue->priority = priority; queue->priority = priority;
...@@ -293,6 +242,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c ...@@ -293,6 +242,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
{ {
int i; int i;
spin_lock_init(&queue->lock);
for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
INIT_LIST_HEAD(&queue->tasks[i]); INIT_LIST_HEAD(&queue->tasks[i]);
queue->maxpriority = maxprio; queue->maxpriority = maxprio;
...@@ -319,34 +269,31 @@ EXPORT_SYMBOL(rpc_init_wait_queue); ...@@ -319,34 +269,31 @@ EXPORT_SYMBOL(rpc_init_wait_queue);
* Note: If the task is ASYNC, this must be called with * Note: If the task is ASYNC, this must be called with
* the spinlock held to protect the wait queue operation. * the spinlock held to protect the wait queue operation.
*/ */
static inline void static void rpc_make_runnable(struct rpc_task *task)
rpc_make_runnable(struct rpc_task *task)
{ {
if (task->tk_timeout_fn) { int do_ret;
printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
BUG_ON(task->tk_timeout_fn);
do_ret = rpc_test_and_set_running(task);
rpc_clear_queued(task);
if (do_ret)
return; return;
}
rpc_set_running(task);
if (RPC_IS_ASYNC(task)) { if (RPC_IS_ASYNC(task)) {
if (RPC_IS_SLEEPING(task)) { int status;
int status;
status = __rpc_add_wait_queue(&schedq, task); INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
if (status < 0) { status = queue_work(task->tk_workqueue, &task->u.tk_work);
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); if (status < 0) {
task->tk_status = status; printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
return; task->tk_status = status;
} return;
rpc_clear_sleeping(task);
wake_up(&rpciod_idle);
} }
} else { } else
rpc_clear_sleeping(task); wake_up(&task->u.tk_wait.waitq);
wake_up(&task->tk_wait);
}
} }
/* /*
* Place a newly initialized task on the schedq. * Place a newly initialized task on the workqueue.
*/ */
static inline void static inline void
rpc_schedule_run(struct rpc_task *task) rpc_schedule_run(struct rpc_task *task)
...@@ -355,33 +302,18 @@ rpc_schedule_run(struct rpc_task *task) ...@@ -355,33 +302,18 @@ rpc_schedule_run(struct rpc_task *task)
if (RPC_IS_ACTIVATED(task)) if (RPC_IS_ACTIVATED(task))
return; return;
task->tk_active = 1; task->tk_active = 1;
rpc_set_sleeping(task);
rpc_make_runnable(task); rpc_make_runnable(task);
} }
/*
* For other people who may need to wake the I/O daemon
* but should (for now) know nothing about its innards
*/
void rpciod_wake_up(void)
{
if(rpciod_pid==0)
printk(KERN_ERR "rpciod: wot no daemon?\n");
wake_up(&rpciod_idle);
}
/* /*
* Prepare for sleeping on a wait queue. * Prepare for sleeping on a wait queue.
* By always appending tasks to the list we ensure FIFO behavior. * By always appending tasks to the list we ensure FIFO behavior.
* NB: An RPC task will only receive interrupt-driven events as long * NB: An RPC task will only receive interrupt-driven events as long
* as it's on a wait queue. * as it's on a wait queue.
*/ */
static void static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, rpc_action timer) rpc_action action, rpc_action timer)
{ {
int status;
dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid, dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
rpc_qname(q), jiffies); rpc_qname(q), jiffies);
...@@ -391,75 +323,66 @@ __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, ...@@ -391,75 +323,66 @@ __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
} }
/* Mark the task as being activated if so needed */ /* Mark the task as being activated if so needed */
if (!RPC_IS_ACTIVATED(task)) { if (!RPC_IS_ACTIVATED(task))
task->tk_active = 1; task->tk_active = 1;
rpc_set_sleeping(task);
}
status = __rpc_add_wait_queue(q, task); __rpc_add_wait_queue(q, task);
if (status) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); BUG_ON(task->tk_callback != NULL);
task->tk_status = status; task->tk_callback = action;
} else { __rpc_add_timer(task, timer);
rpc_clear_running(task);
if (task->tk_callback) {
dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
BUG();
}
task->tk_callback = action;
__rpc_add_timer(task, timer);
}
} }
void void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
rpc_action action, rpc_action timer) rpc_action action, rpc_action timer)
{ {
/* /*
* Protect the queue operations. * Protect the queue operations.
*/ */
spin_lock_bh(&rpc_queue_lock); spin_lock_bh(&q->lock);
__rpc_sleep_on(q, task, action, timer); __rpc_sleep_on(q, task, action, timer);
spin_unlock_bh(&rpc_queue_lock); spin_unlock_bh(&q->lock);
} }
/** /**
* __rpc_wake_up_task - wake up a single rpc_task * __rpc_do_wake_up_task - wake up a single rpc_task
* @task: task to be woken up * @task: task to be woken up
* *
* Caller must hold rpc_queue_lock * Caller must hold queue->lock, and have cleared the task queued flag.
*/ */
static void static void __rpc_do_wake_up_task(struct rpc_task *task)
__rpc_wake_up_task(struct rpc_task *task)
{ {
dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n", dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies);
task->tk_pid, jiffies, rpc_inhibit);
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
if (task->tk_magic != 0xf00baa) { BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
printk(KERN_ERR "RPC: attempt to wake up non-existing task!\n");
rpc_debug = ~0;
rpc_show_tasks();
return;
}
#endif #endif
/* Has the task been executed yet? If not, we cannot wake it up! */ /* Has the task been executed yet? If not, we cannot wake it up! */
if (!RPC_IS_ACTIVATED(task)) { if (!RPC_IS_ACTIVATED(task)) {
printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
return; return;
} }
if (RPC_IS_RUNNING(task))
return;
__rpc_disable_timer(task); __rpc_disable_timer(task);
if (task->tk_rpcwait != &schedq) __rpc_remove_wait_queue(task);
__rpc_remove_wait_queue(task);
rpc_make_runnable(task); rpc_make_runnable(task);
dprintk("RPC: __rpc_wake_up_task done\n"); dprintk("RPC: __rpc_wake_up_task done\n");
} }
/*
* Wake up the specified task
*/
static void __rpc_wake_up_task(struct rpc_task *task)
{
if (rpc_start_wakeup(task)) {
if (RPC_IS_QUEUED(task))
__rpc_do_wake_up_task(task);
rpc_finish_wakeup(task);
}
}
/* /*
* Default timeout handler if none specified by user * Default timeout handler if none specified by user
*/ */
...@@ -474,14 +397,18 @@ __rpc_default_timer(struct rpc_task *task) ...@@ -474,14 +397,18 @@ __rpc_default_timer(struct rpc_task *task)
/* /*
* Wake up the specified task * Wake up the specified task
*/ */
void void rpc_wake_up_task(struct rpc_task *task)
rpc_wake_up_task(struct rpc_task *task)
{ {
if (RPC_IS_RUNNING(task)) if (rpc_start_wakeup(task)) {
return; if (RPC_IS_QUEUED(task)) {
spin_lock_bh(&rpc_queue_lock); struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
__rpc_wake_up_task(task);
spin_unlock_bh(&rpc_queue_lock); spin_lock_bh(&queue->lock);
__rpc_do_wake_up_task(task);
spin_unlock_bh(&queue->lock);
}
rpc_finish_wakeup(task);
}
} }
/* /*
...@@ -497,11 +424,11 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu ...@@ -497,11 +424,11 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
*/ */
q = &queue->tasks[queue->priority]; q = &queue->tasks[queue->priority];
if (!list_empty(q)) { if (!list_empty(q)) {
task = list_entry(q->next, struct rpc_task, tk_list); task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
if (queue->cookie == task->tk_cookie) { if (queue->cookie == task->tk_cookie) {
if (--queue->nr) if (--queue->nr)
goto out; goto out;
list_move_tail(&task->tk_list, q); list_move_tail(&task->u.tk_wait.list, q);
} }
/* /*
* Check if we need to switch queues. * Check if we need to switch queues.
...@@ -519,7 +446,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu ...@@ -519,7 +446,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
else else
q = q - 1; q = q - 1;
if (!list_empty(q)) { if (!list_empty(q)) {
task = list_entry(q->next, struct rpc_task, tk_list); task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
goto new_queue; goto new_queue;
} }
} while (q != &queue->tasks[queue->priority]); } while (q != &queue->tasks[queue->priority]);
...@@ -544,14 +471,14 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) ...@@ -544,14 +471,14 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
struct rpc_task *task = NULL; struct rpc_task *task = NULL;
dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
spin_lock_bh(&rpc_queue_lock); spin_lock_bh(&queue->lock);
if (RPC_IS_PRIORITY(queue)) if (RPC_IS_PRIORITY(queue))
task = __rpc_wake_up_next_priority(queue); task = __rpc_wake_up_next_priority(queue);
else { else {
task_for_first(task, &queue->tasks[0]) task_for_first(task, &queue->tasks[0])
__rpc_wake_up_task(task); __rpc_wake_up_task(task);
} }
spin_unlock_bh(&rpc_queue_lock); spin_unlock_bh(&queue->lock);
return task; return task;
} }
...@@ -560,25 +487,25 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) ...@@ -560,25 +487,25 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
* rpc_wake_up - wake up all rpc_tasks * rpc_wake_up - wake up all rpc_tasks
* @queue: rpc_wait_queue on which the tasks are sleeping * @queue: rpc_wait_queue on which the tasks are sleeping
* *
* Grabs rpc_queue_lock * Grabs queue->lock
*/ */
void rpc_wake_up(struct rpc_wait_queue *queue) void rpc_wake_up(struct rpc_wait_queue *queue)
{ {
struct rpc_task *task; struct rpc_task *task;
struct list_head *head; struct list_head *head;
spin_lock_bh(&rpc_queue_lock); spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority]; head = &queue->tasks[queue->maxpriority];
for (;;) { for (;;) {
while (!list_empty(head)) { while (!list_empty(head)) {
task = list_entry(head->next, struct rpc_task, tk_list); task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
__rpc_wake_up_task(task); __rpc_wake_up_task(task);
} }
if (head == &queue->tasks[0]) if (head == &queue->tasks[0])
break; break;
head--; head--;
} }
spin_unlock_bh(&rpc_queue_lock); spin_unlock_bh(&queue->lock);
} }
/** /**
...@@ -586,18 +513,18 @@ void rpc_wake_up(struct rpc_wait_queue *queue) ...@@ -586,18 +513,18 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
* @queue: rpc_wait_queue on which the tasks are sleeping * @queue: rpc_wait_queue on which the tasks are sleeping
* @status: status value to set * @status: status value to set
* *
* Grabs rpc_queue_lock * Grabs queue->lock
*/ */
void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
{ {
struct list_head *head; struct list_head *head;
struct rpc_task *task; struct rpc_task *task;
spin_lock_bh(&rpc_queue_lock); spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority]; head = &queue->tasks[queue->maxpriority];
for (;;) { for (;;) {
while (!list_empty(head)) { while (!list_empty(head)) {
task = list_entry(head->next, struct rpc_task, tk_list); task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
task->tk_status = status; task->tk_status = status;
__rpc_wake_up_task(task); __rpc_wake_up_task(task);
} }
...@@ -605,7 +532,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) ...@@ -605,7 +532,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
break; break;
head--; head--;
} }
spin_unlock_bh(&rpc_queue_lock); spin_unlock_bh(&queue->lock);
} }
/* /*
...@@ -629,21 +556,22 @@ __rpc_atrun(struct rpc_task *task) ...@@ -629,21 +556,22 @@ __rpc_atrun(struct rpc_task *task)
/* /*
* This is the RPC `scheduler' (or rather, the finite state machine). * This is the RPC `scheduler' (or rather, the finite state machine).
*/ */
static int static int __rpc_execute(struct rpc_task *task)
__rpc_execute(struct rpc_task *task)
{ {
int status = 0; int status = 0;
dprintk("RPC: %4d rpc_execute flgs %x\n", dprintk("RPC: %4d rpc_execute flgs %x\n",
task->tk_pid, task->tk_flags); task->tk_pid, task->tk_flags);
if (!RPC_IS_RUNNING(task)) { BUG_ON(RPC_IS_QUEUED(task));
printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
return 0;
}
restarted: restarted:
while (1) { while (1) {
/*
* Garbage collection of pending timers...
*/
rpc_delete_timer(task);
/* /*
* Execute any pending callback. * Execute any pending callback.
*/ */
...@@ -660,7 +588,9 @@ __rpc_execute(struct rpc_task *task) ...@@ -660,7 +588,9 @@ __rpc_execute(struct rpc_task *task)
*/ */
save_callback=task->tk_callback; save_callback=task->tk_callback;
task->tk_callback=NULL; task->tk_callback=NULL;
lock_kernel();
save_callback(task); save_callback(task);
unlock_kernel();
} }
/* /*
...@@ -668,43 +598,35 @@ __rpc_execute(struct rpc_task *task) ...@@ -668,43 +598,35 @@ __rpc_execute(struct rpc_task *task)
* tk_action may be NULL when the task has been killed * tk_action may be NULL when the task has been killed
* by someone else. * by someone else.
*/ */
if (RPC_IS_RUNNING(task)) { if (!RPC_IS_QUEUED(task)) {
/*
* Garbage collection of pending timers...
*/
rpc_delete_timer(task);
if (!task->tk_action) if (!task->tk_action)
break; break;
lock_kernel();
task->tk_action(task); task->tk_action(task);
/* micro-optimization to avoid spinlock */ unlock_kernel();
if (RPC_IS_RUNNING(task))
continue;
} }
/* /*
* Check whether task is sleeping. * Lockless check for whether task is sleeping or not.
*/ */
spin_lock_bh(&rpc_queue_lock); if (!RPC_IS_QUEUED(task))
if (!RPC_IS_RUNNING(task)) { continue;
rpc_set_sleeping(task); rpc_clear_running(task);
if (RPC_IS_ASYNC(task)) { if (RPC_IS_ASYNC(task)) {
spin_unlock_bh(&rpc_queue_lock); /* Careful! we may have raced... */
if (RPC_IS_QUEUED(task))
return 0; return 0;
} if (rpc_test_and_set_running(task))
return 0;
continue;
} }
spin_unlock_bh(&rpc_queue_lock);
if (!RPC_IS_SLEEPING(task))
continue;
/* sync task: sleep here */ /* sync task: sleep here */
dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
if (current->pid == rpciod_pid)
printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
if (RPC_TASK_UNINTERRUPTIBLE(task)) { if (RPC_TASK_UNINTERRUPTIBLE(task)) {
__wait_event(task->tk_wait, !RPC_IS_SLEEPING(task)); __wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
} else { } else {
__wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status); __wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
/* /*
* When a sync task receives a signal, it exits with * When a sync task receives a signal, it exits with
* -ERESTARTSYS. In order to catch any callbacks that * -ERESTARTSYS. In order to catch any callbacks that
...@@ -718,11 +640,14 @@ __rpc_execute(struct rpc_task *task) ...@@ -718,11 +640,14 @@ __rpc_execute(struct rpc_task *task)
rpc_wake_up_task(task); rpc_wake_up_task(task);
} }
} }
rpc_set_running(task);
dprintk("RPC: %4d sync task resuming\n", task->tk_pid); dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
} }
if (task->tk_exit) { if (task->tk_exit) {
lock_kernel();
task->tk_exit(task); task->tk_exit(task);
unlock_kernel();
/* If tk_action is non-null, the user wants us to restart */ /* If tk_action is non-null, the user wants us to restart */
if (task->tk_action) { if (task->tk_action) {
if (!RPC_ASSASSINATED(task)) { if (!RPC_ASSASSINATED(task)) {
...@@ -741,7 +666,6 @@ __rpc_execute(struct rpc_task *task) ...@@ -741,7 +666,6 @@ __rpc_execute(struct rpc_task *task)
/* Release all resources associated with the task */ /* Release all resources associated with the task */
rpc_release_task(task); rpc_release_task(task);
return status; return status;
} }
...@@ -757,57 +681,16 @@ __rpc_execute(struct rpc_task *task) ...@@ -757,57 +681,16 @@ __rpc_execute(struct rpc_task *task)
int int
rpc_execute(struct rpc_task *task) rpc_execute(struct rpc_task *task)
{ {
int status = -EIO; BUG_ON(task->tk_active);
if (rpc_inhibit) {
printk(KERN_INFO "RPC: execution inhibited!\n");
goto out_release;
}
status = -EWOULDBLOCK;
if (task->tk_active) {
printk(KERN_ERR "RPC: active task was run twice!\n");
goto out_err;
}
task->tk_active = 1; task->tk_active = 1;
rpc_set_running(task); rpc_set_running(task);
return __rpc_execute(task); return __rpc_execute(task);
out_release:
rpc_release_task(task);
out_err:
return status;
} }
/* static void rpc_async_schedule(void *arg)
* This is our own little scheduler for async RPC tasks.
*/
static void
__rpc_schedule(void)
{ {
struct rpc_task *task; __rpc_execute((struct rpc_task *)arg);
int count = 0;
dprintk("RPC: rpc_schedule enter\n");
while (1) {
task_for_first(task, &schedq.tasks[0]) {
__rpc_remove_wait_queue(task);
spin_unlock_bh(&rpc_queue_lock);
__rpc_execute(task);
spin_lock_bh(&rpc_queue_lock);
} else {
break;
}
if (++count >= 200 || need_resched()) {
count = 0;
spin_unlock_bh(&rpc_queue_lock);
schedule();
spin_lock_bh(&rpc_queue_lock);
}
}
dprintk("RPC: rpc_schedule leave\n");
} }
/* /*
...@@ -865,7 +748,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call ...@@ -865,7 +748,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
task->tk_client = clnt; task->tk_client = clnt;
task->tk_flags = flags; task->tk_flags = flags;
task->tk_exit = callback; task->tk_exit = callback;
init_waitqueue_head(&task->tk_wait);
if (current->uid != current->fsuid || current->gid != current->fsgid) if (current->uid != current->fsuid || current->gid != current->fsgid)
task->tk_flags |= RPC_TASK_SETUID; task->tk_flags |= RPC_TASK_SETUID;
...@@ -876,12 +758,11 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call ...@@ -876,12 +758,11 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
task->tk_priority = RPC_PRIORITY_NORMAL; task->tk_priority = RPC_PRIORITY_NORMAL;
task->tk_cookie = (unsigned long)current; task->tk_cookie = (unsigned long)current;
INIT_LIST_HEAD(&task->tk_links);
/* Add to global list of all tasks */ /* Initialize workqueue for async tasks */
spin_lock(&rpc_sched_lock); task->tk_workqueue = rpciod_workqueue;
list_add(&task->tk_task, &all_tasks); if (!RPC_IS_ASYNC(task))
spin_unlock(&rpc_sched_lock); init_waitqueue_head(&task->u.tk_wait.waitq);
if (clnt) { if (clnt) {
atomic_inc(&clnt->cl_users); atomic_inc(&clnt->cl_users);
...@@ -892,9 +773,14 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call ...@@ -892,9 +773,14 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
} }
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
task->tk_magic = 0xf00baa; task->tk_magic = RPC_TASK_MAGIC_ID;
task->tk_pid = rpc_task_id++; task->tk_pid = rpc_task_id++;
#endif #endif
/* Add to global list of all tasks */
spin_lock(&rpc_sched_lock);
list_add_tail(&task->tk_task, &all_tasks);
spin_unlock(&rpc_sched_lock);
dprintk("RPC: %4d new task procpid %d\n", task->tk_pid, dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
current->pid); current->pid);
} }
...@@ -947,18 +833,12 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags) ...@@ -947,18 +833,12 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
goto out; goto out;
} }
void void rpc_release_task(struct rpc_task *task)
rpc_release_task(struct rpc_task *task)
{ {
dprintk("RPC: %4d release task\n", task->tk_pid); dprintk("RPC: %4d release task\n", task->tk_pid);
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
if (task->tk_magic != 0xf00baa) { BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
printk(KERN_ERR "RPC: attempt to release a non-existing task!\n");
rpc_debug = ~0;
rpc_show_tasks();
return;
}
#endif #endif
/* Remove from global task list */ /* Remove from global task list */
...@@ -966,19 +846,9 @@ rpc_release_task(struct rpc_task *task) ...@@ -966,19 +846,9 @@ rpc_release_task(struct rpc_task *task)
list_del(&task->tk_task); list_del(&task->tk_task);
spin_unlock(&rpc_sched_lock); spin_unlock(&rpc_sched_lock);
/* Protect the execution below. */ BUG_ON (RPC_IS_QUEUED(task));
spin_lock_bh(&rpc_queue_lock);
/* Disable timer to prevent zombie wakeup */
__rpc_disable_timer(task);
/* Remove from any wait queue we're still on */
__rpc_remove_wait_queue(task);
task->tk_active = 0; task->tk_active = 0;
spin_unlock_bh(&rpc_queue_lock);
/* Synchronously delete any running timer */ /* Synchronously delete any running timer */
rpc_delete_timer(task); rpc_delete_timer(task);
...@@ -1008,10 +878,9 @@ rpc_release_task(struct rpc_task *task) ...@@ -1008,10 +878,9 @@ rpc_release_task(struct rpc_task *task)
* queue 'childq'. If so returns a pointer to the parent. * queue 'childq'. If so returns a pointer to the parent.
* Upon failure returns NULL. * Upon failure returns NULL.
* *
* Caller must hold rpc_queue_lock * Caller must hold childq.lock
*/ */
static inline struct rpc_task * static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
rpc_find_parent(struct rpc_task *child)
{ {
struct rpc_task *task, *parent; struct rpc_task *task, *parent;
struct list_head *le; struct list_head *le;
...@@ -1024,17 +893,16 @@ rpc_find_parent(struct rpc_task *child) ...@@ -1024,17 +893,16 @@ rpc_find_parent(struct rpc_task *child)
return NULL; return NULL;
} }
static void static void rpc_child_exit(struct rpc_task *child)
rpc_child_exit(struct rpc_task *child)
{ {
struct rpc_task *parent; struct rpc_task *parent;
spin_lock_bh(&rpc_queue_lock); spin_lock_bh(&childq.lock);
if ((parent = rpc_find_parent(child)) != NULL) { if ((parent = rpc_find_parent(child)) != NULL) {
parent->tk_status = child->tk_status; parent->tk_status = child->tk_status;
__rpc_wake_up_task(parent); __rpc_wake_up_task(parent);
} }
spin_unlock_bh(&rpc_queue_lock); spin_unlock_bh(&childq.lock);
} }
/* /*
...@@ -1057,22 +925,20 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent) ...@@ -1057,22 +925,20 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
return NULL; return NULL;
} }
void void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
{ {
spin_lock_bh(&rpc_queue_lock); spin_lock_bh(&childq.lock);
/* N.B. Is it possible for the child to have already finished? */ /* N.B. Is it possible for the child to have already finished? */
__rpc_sleep_on(&childq, task, func, NULL); __rpc_sleep_on(&childq, task, func, NULL);
rpc_schedule_run(child); rpc_schedule_run(child);
spin_unlock_bh(&rpc_queue_lock); spin_unlock_bh(&childq.lock);
} }
/* /*
* Kill all tasks for the given client. * Kill all tasks for the given client.
* XXX: kill their descendants as well? * XXX: kill their descendants as well?
*/ */
void void rpc_killall_tasks(struct rpc_clnt *clnt)
rpc_killall_tasks(struct rpc_clnt *clnt)
{ {
struct rpc_task *rovr; struct rpc_task *rovr;
struct list_head *le; struct list_head *le;
...@@ -1083,104 +949,28 @@ rpc_killall_tasks(struct rpc_clnt *clnt) ...@@ -1083,104 +949,28 @@ rpc_killall_tasks(struct rpc_clnt *clnt)
* Spin lock all_tasks to prevent changes... * Spin lock all_tasks to prevent changes...
*/ */
spin_lock(&rpc_sched_lock); spin_lock(&rpc_sched_lock);
alltask_for_each(rovr, le, &all_tasks) alltask_for_each(rovr, le, &all_tasks) {
if (! RPC_IS_ACTIVATED(rovr))
continue;
if (!clnt || rovr->tk_client == clnt) { if (!clnt || rovr->tk_client == clnt) {
rovr->tk_flags |= RPC_TASK_KILLED; rovr->tk_flags |= RPC_TASK_KILLED;
rpc_exit(rovr, -EIO); rpc_exit(rovr, -EIO);
rpc_wake_up_task(rovr); rpc_wake_up_task(rovr);
} }
}
spin_unlock(&rpc_sched_lock); spin_unlock(&rpc_sched_lock);
} }
static DECLARE_MUTEX_LOCKED(rpciod_running); static DECLARE_MUTEX_LOCKED(rpciod_running);
static inline int static void rpciod_killall(void)
rpciod_task_pending(void)
{
return !list_empty(&schedq.tasks[0]);
}
/*
* This is the rpciod kernel thread
*/
static int
rpciod(void *ptr)
{
int rounds = 0;
lock_kernel();
/*
* Let our maker know we're running ...
*/
rpciod_pid = current->pid;
up(&rpciod_running);
daemonize("rpciod");
allow_signal(SIGKILL);
dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
spin_lock_bh(&rpc_queue_lock);
while (rpciod_users) {
DEFINE_WAIT(wait);
if (signalled()) {
spin_unlock_bh(&rpc_queue_lock);
rpciod_killall();
flush_signals(current);
spin_lock_bh(&rpc_queue_lock);
}
__rpc_schedule();
if (current->flags & PF_FREEZE) {
spin_unlock_bh(&rpc_queue_lock);
refrigerator(PF_FREEZE);
spin_lock_bh(&rpc_queue_lock);
}
if (++rounds >= 64) { /* safeguard */
spin_unlock_bh(&rpc_queue_lock);
schedule();
rounds = 0;
spin_lock_bh(&rpc_queue_lock);
}
dprintk("RPC: rpciod back to sleep\n");
prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE);
if (!rpciod_task_pending() && !signalled()) {
spin_unlock_bh(&rpc_queue_lock);
schedule();
rounds = 0;
spin_lock_bh(&rpc_queue_lock);
}
finish_wait(&rpciod_idle, &wait);
dprintk("RPC: switch to rpciod\n");
}
spin_unlock_bh(&rpc_queue_lock);
dprintk("RPC: rpciod shutdown commences\n");
if (!list_empty(&all_tasks)) {
printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
rpciod_killall();
}
dprintk("RPC: rpciod exiting\n");
unlock_kernel();
rpciod_pid = 0;
complete_and_exit(&rpciod_killer, 0);
return 0;
}
static void
rpciod_killall(void)
{ {
unsigned long flags; unsigned long flags;
while (!list_empty(&all_tasks)) { while (!list_empty(&all_tasks)) {
clear_thread_flag(TIF_SIGPENDING); clear_thread_flag(TIF_SIGPENDING);
rpc_killall_tasks(NULL); rpc_killall_tasks(NULL);
spin_lock_bh(&rpc_queue_lock); flush_workqueue(rpciod_workqueue);
__rpc_schedule();
spin_unlock_bh(&rpc_queue_lock);
if (!list_empty(&all_tasks)) { if (!list_empty(&all_tasks)) {
dprintk("rpciod_killall: waiting for tasks to exit\n"); dprintk("rpciod_killall: waiting for tasks to exit\n");
yield(); yield();
...@@ -1198,28 +988,30 @@ rpciod_killall(void) ...@@ -1198,28 +988,30 @@ rpciod_killall(void)
int int
rpciod_up(void) rpciod_up(void)
{ {
struct workqueue_struct *wq;
int error = 0; int error = 0;
down(&rpciod_sema); down(&rpciod_sema);
dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users); dprintk("rpciod_up: users %d\n", rpciod_users);
rpciod_users++; rpciod_users++;
if (rpciod_pid) if (rpciod_workqueue)
goto out; goto out;
/* /*
* If there's no pid, we should be the first user. * If there's no pid, we should be the first user.
*/ */
if (rpciod_users > 1) if (rpciod_users > 1)
printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users); printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users);
/* /*
* Create the rpciod thread and wait for it to start. * Create the rpciod thread and wait for it to start.
*/ */
error = kernel_thread(rpciod, NULL, 0); error = -ENOMEM;
if (error < 0) { wq = create_workqueue("rpciod");
printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error); if (wq == NULL) {
printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
rpciod_users--; rpciod_users--;
goto out; goto out;
} }
down(&rpciod_running); rpciod_workqueue = wq;
error = 0; error = 0;
out: out:
up(&rpciod_sema); up(&rpciod_sema);
...@@ -1230,20 +1022,21 @@ void ...@@ -1230,20 +1022,21 @@ void
rpciod_down(void) rpciod_down(void)
{ {
down(&rpciod_sema); down(&rpciod_sema);
dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users); dprintk("rpciod_down sema %d\n", rpciod_users);
if (rpciod_users) { if (rpciod_users) {
if (--rpciod_users) if (--rpciod_users)
goto out; goto out;
} else } else
printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid); printk(KERN_WARNING "rpciod_down: no users??\n");
if (!rpciod_pid) { if (!rpciod_workqueue) {
dprintk("rpciod_down: Nothing to do!\n"); dprintk("rpciod_down: Nothing to do!\n");
goto out; goto out;
} }
rpciod_killall();
kill_proc(rpciod_pid, SIGKILL, 1); destroy_workqueue(rpciod_workqueue);
wait_for_completion(&rpciod_killer); rpciod_workqueue = NULL;
out: out:
up(&rpciod_sema); up(&rpciod_sema);
} }
...@@ -1261,7 +1054,12 @@ void rpc_show_tasks(void) ...@@ -1261,7 +1054,12 @@ void rpc_show_tasks(void)
} }
printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
"-rpcwait -action- --exit--\n"); "-rpcwait -action- --exit--\n");
alltask_for_each(t, le, &all_tasks) alltask_for_each(t, le, &all_tasks) {
const char *rpc_waitq = "none";
if (RPC_IS_QUEUED(t))
rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
t->tk_pid, t->tk_pid,
(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
...@@ -1269,8 +1067,9 @@ void rpc_show_tasks(void) ...@@ -1269,8 +1067,9 @@ void rpc_show_tasks(void)
t->tk_client, t->tk_client,
(t->tk_client ? t->tk_client->cl_prog : 0), (t->tk_client ? t->tk_client->cl_prog : 0),
t->tk_rqstp, t->tk_timeout, t->tk_rqstp, t->tk_timeout,
rpc_qname(t->tk_rpcwait), rpc_waitq,
t->tk_action, t->tk_exit); t->tk_action, t->tk_exit);
}
spin_unlock(&rpc_sched_lock); spin_unlock(&rpc_sched_lock);
} }
#endif #endif
......
...@@ -371,6 +371,7 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, ...@@ -371,6 +371,7 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
do { do {
/* Are any pointers crossing a page boundary? */ /* Are any pointers crossing a page boundary? */
if (pgto_base == 0) { if (pgto_base == 0) {
flush_dcache_page(*pgto);
pgto_base = PAGE_CACHE_SIZE; pgto_base = PAGE_CACHE_SIZE;
pgto--; pgto--;
} }
...@@ -394,6 +395,7 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, ...@@ -394,6 +395,7 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
kunmap_atomic(vto, KM_USER0); kunmap_atomic(vto, KM_USER0);
} while ((len -= copy) != 0); } while ((len -= copy) != 0);
flush_dcache_page(*pgto);
} }
/* /*
...@@ -427,12 +429,14 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len) ...@@ -427,12 +429,14 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)
pgbase += copy; pgbase += copy;
if (pgbase == PAGE_CACHE_SIZE) { if (pgbase == PAGE_CACHE_SIZE) {
flush_dcache_page(*pgto);
pgbase = 0; pgbase = 0;
pgto++; pgto++;
} }
p += copy; p += copy;
} while ((len -= copy) != 0); } while ((len -= copy) != 0);
flush_dcache_page(*pgto);
} }
/* /*
......
...@@ -893,7 +893,8 @@ tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc) ...@@ -893,7 +893,8 @@ tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
xprt->tcp_flags &= ~XPRT_COPY_XID; xprt->tcp_flags &= ~XPRT_COPY_XID;
xprt->tcp_flags |= XPRT_COPY_DATA; xprt->tcp_flags |= XPRT_COPY_DATA;
xprt->tcp_copied = 4; xprt->tcp_copied = 4;
dprintk("RPC: reading reply for XID %08x\n", xprt->tcp_xid); dprintk("RPC: reading reply for XID %08x\n",
ntohl(xprt->tcp_xid));
tcp_check_recm(xprt); tcp_check_recm(xprt);
} }
...@@ -913,7 +914,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc) ...@@ -913,7 +914,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
if (!req) { if (!req) {
xprt->tcp_flags &= ~XPRT_COPY_DATA; xprt->tcp_flags &= ~XPRT_COPY_DATA;
dprintk("RPC: XID %08x request not found!\n", dprintk("RPC: XID %08x request not found!\n",
xprt->tcp_xid); ntohl(xprt->tcp_xid));
spin_unlock(&xprt->sock_lock); spin_unlock(&xprt->sock_lock);
return; return;
} }
...@@ -1103,7 +1104,7 @@ xprt_write_space(struct sock *sk) ...@@ -1103,7 +1104,7 @@ xprt_write_space(struct sock *sk)
goto out; goto out;
spin_lock_bh(&xprt->sock_lock); spin_lock_bh(&xprt->sock_lock);
if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) if (xprt->snd_task)
rpc_wake_up_task(xprt->snd_task); rpc_wake_up_task(xprt->snd_task);
spin_unlock_bh(&xprt->sock_lock); spin_unlock_bh(&xprt->sock_lock);
out: out:
...@@ -1362,7 +1363,7 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) ...@@ -1362,7 +1363,7 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
req->rq_xprt = xprt; req->rq_xprt = xprt;
req->rq_xid = xprt_alloc_xid(xprt); req->rq_xid = xprt_alloc_xid(xprt);
dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
req, req->rq_xid); req, ntohl(req->rq_xid));
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment