Merge bk://nfsclient.bkbits.net/linux-2.6

into fys.uio.no:/home/linux/bitkeeper/nfsclient-2.6

Merge bk://nfsclient.bkbits.net/linux-2.6
into fys.uio.no:/home/linux/bitkeeper/nfsclient-2.6
ca486f40 · Trond Myklebust · e8323593 · 9e84df77 · ca486f40 · ca486f40
Commit ca486f40 authored Jan 05, 2005 by Trond Myklebust
25 changed files
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -783,6 +783,54 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
 	security_d_instantiate(entry, inode);
 }

+/**
+ * d_instantiate_unique - instantiate a non-aliased dentry
+ * @entry: dentry to instantiate
+ * @inode: inode to attach to this dentry
+ *
+ * Fill in inode information in the entry. On success, it returns NULL.
+ * If an unhashed alias of "entry" already exists, then we return the
+ * aliased dentry instead.
+ *
+ * Note that in order to avoid conflicts with rename() etc, the caller
+ * had better be holding the parent directory semaphore.
+ */
+struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *alias;
+	int len = entry->d_name.len;
+	const char *name = entry->d_name.name;
+	unsigned int hash = entry->d_name.hash;
+
+	BUG_ON(!list_empty(&entry->d_alias));
+	spin_lock(&dcache_lock);
+	if (!inode)
+		goto do_negative;
+	list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+		struct qstr *qstr = &alias->d_name;
+
+		if (qstr->hash != hash)
+			continue;
+		if (alias->d_parent != entry->d_parent)
+			continue;
+		if (qstr->len != len)
+			continue;
+		if (memcmp(qstr->name, name, len))
+			continue;
+		dget_locked(alias);
+		spin_unlock(&dcache_lock);
+		BUG_ON(!d_unhashed(alias));
+		return alias;
+	}
+	list_add(&entry->d_alias, &inode->i_dentry);
+do_negative:
+	entry->d_inode = inode;
+	spin_unlock(&dcache_lock);
+	security_d_instantiate(entry, inode);
+	return NULL;
+}
+EXPORT_SYMBOL(d_instantiate_unique);
+
 /**
 * d_alloc_root - allocate root dentry
 * @root_inode: inode to allocate the root for

--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1563,9 +1563,6 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
 		if (error < 0)
 			goto out;
-		else if (error == LOCK_USE_CLNT)
-		  /* Bypass for NFS with no locking - 2.0.36 compat */
-		  fl = posix_test_lock(filp, &file_lock);
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {
@@ -1708,9 +1705,6 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 		error = filp->f_op->lock(filp, F_GETLK, &file_lock);
 		if (error < 0)
 			goto out;
-		else if (error == LOCK_USE_CLNT)
-		  /* Bypass for NFS with no locking - 2.0.36 compat */
-		  fl = posix_test_lock(filp, &file_lock);
 		else
 		  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
 	} else {

--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -40,8 +40,6 @@
 static int nfs_opendir(struct inode *, struct file *);
 static int nfs_readdir(struct file *, void *, filldir_t);
 static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int nfs_cached_lookup(struct inode *, struct dentry *,
-				struct nfs_fh *, struct nfs_fattr *);
 static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
 static int nfs_mkdir(struct inode *, struct dentry *, int);
 static int nfs_rmdir(struct inode *, struct dentry *);
@@ -294,24 +292,13 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 	return res;
 }

-static unsigned int nfs_type2dtype[] = {
-	DT_UNKNOWN,
-	DT_REG,
-	DT_DIR,
-	DT_BLK,
-	DT_CHR,
-	DT_LNK,
-	DT_SOCK,
-	DT_UNKNOWN,
-	DT_FIFO
-};
-
-static inline
-unsigned int nfs_type_to_d_type(enum nfs_ftype type)
+static inline unsigned int dt_type(struct inode *inode)
 {
-	return nfs_type2dtype[type];
+	return (inode->i_mode >> 12) & 15;
 }

+static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
+
 /*
 * Once we've found the start of the dirent within a page: fill 'er up...
 */
@@ -321,6 +308,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 {
 	struct file	*file = desc->file;
 	struct nfs_entry *entry = desc->entry;
+	struct dentry	*dentry = NULL;
 	unsigned long	fileid;
 	int		loop_count = 0,
 			res;
@@ -333,9 +321,16 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 		 *	 retrieving the current dirent on the server */
 		fileid = nfs_fileid_to_ino_t(entry->ino);

+		/* Get a dentry if we have one */
+		if (dentry != NULL)
+			dput(dentry);
+		dentry = nfs_readdir_lookup(desc);
+
 		/* Use readdirplus info */
-		if (desc->plus && (entry->fattr->valid & NFS_ATTR_FATTR))
-			d_type = nfs_type_to_d_type(entry->fattr->type);
+		if (dentry != NULL && dentry->d_inode != NULL) {
+			d_type = dt_type(dentry->d_inode);
+			fileid = dentry->d_inode->i_ino;
+		}

 		res = filldir(dirent, entry->name, entry->len, 
 			      entry->prev_cookie, fileid, d_type);
@@ -352,7 +347,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 		}
 	}
 	dir_page_release(desc);
-
+	if (dentry != NULL)
+		dput(dentry);
 	dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res);
 	return res;
 }
@@ -615,24 +611,10 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 		goto out_valid;
 	}

-	/*
-	 * Note: we're not holding inode->i_sem and so may be racing with
-	 * operations that change the directory. We therefore save the
-	 * change attribute *before* we do the RPC call.
-	 */
-	verifier = nfs_save_change_attribute(dir);
-	error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
-	if (!error) {
-		if (nfs_compare_fh(NFS_FH(inode), &fhandle))
-			goto out_bad;
-		if (nfs_lookup_verify_inode(inode, isopen))
-			goto out_zap_parent;
-		goto out_valid_renew;
-	}
-
 	if (NFS_STALE(inode))
 		goto out_bad;

+	verifier = nfs_save_change_attribute(dir);
 	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error)
 		goto out_bad;
@@ -641,7 +623,6 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
 	if ((error = nfs_refresh_inode(inode, &fattr)) != 0)
 		goto out_bad;

- out_valid_renew:
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, verifier);
 out_valid:
@@ -723,6 +704,7 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)

 static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
+	struct dentry *res;
 	struct inode *inode = NULL;
 	int error;
 	struct nfs_fh fhandle;
@@ -731,11 +713,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	dfprintk(VFS, "NFS: lookup(%s/%s)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name);

-	error = -ENAMETOOLONG;
+	res = ERR_PTR(-ENAMETOOLONG);
 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
 		goto out;

-	error = -ENOMEM;
+	res = ERR_PTR(-ENOMEM);
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;

 	lock_kernel();
@@ -746,29 +728,27 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 	if (nfs_is_exclusive_create(dir, nd))
 		goto no_entry;

-	error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
-	if (error != 0) {
-		error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name,
-				&fhandle, &fattr);
+	error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr);
 	if (error == -ENOENT)
 		goto no_entry;
-		if (error != 0)
+	if (error < 0) {
+		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = -EACCES;
+	res = ERR_PTR(-EACCES);
 	inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
 	if (!inode)
 		goto out_unlock;
 no_entry:
-	error = 0;
-	d_add(dentry, inode);
+	res = d_add_unique(dentry, inode);
+	if (res != NULL)
+		dentry = res;
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unlock:
 	unlock_kernel();
 out:
-	BUG_ON(error > 0);
-	return ERR_PTR(error);
+	return res;
 }

 #ifdef CONFIG_NFS_V4
@@ -798,15 +778,15 @@ static int is_atomic_open(struct inode *dir, struct nameidata *nd)

 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+	struct dentry *res = NULL;
 	struct inode *inode = NULL;
-	int error = 0;

 	/* Check that we are indeed trying to open this file */
 	if (!is_atomic_open(dir, nd))
 		goto no_open;

 	if (dentry->d_name.len > NFS_SERVER(dir)->namelen) {
-		error = -ENAMETOOLONG;
+		res = ERR_PTR(-ENAMETOOLONG);
 		goto out;
 	}
 	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
@@ -828,7 +808,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 		inode = nfs4_atomic_open(dir, dentry, nd);
 	unlock_kernel();
 	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
+		int error = PTR_ERR(inode);
 		switch (error) {
 			/* Make a negative dentry */
 			case -ENOENT:
@@ -841,16 +821,18 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
+				res = ERR_PTR(error);
 				goto out;
 		}
 	}
 no_entry:
-	d_add(dentry, inode);
+	res = d_add_unique(dentry, inode);
+	if (res != NULL)
+		dentry = res;
 	nfs_renew_times(dentry);
 	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out:
-	BUG_ON(error > 0);
-	return ERR_PTR(error);
+	return res;
 no_open:
 	return nfs_lookup(dir, dentry, nd);
 }
@@ -906,83 +888,51 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
 }
 #endif /* CONFIG_NFSV4 */

-static inline
-int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry)
+static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
 {
+	struct dentry *parent = desc->file->f_dentry;
+	struct inode *dir = parent->d_inode;
 	struct nfs_entry *entry = desc->entry;
-	int		 status;
+	struct dentry *dentry, *alias;
+	struct qstr name = {
+		.name = entry->name,
+		.len = entry->len,
+	};
+	struct inode *inode;

-	while((status = dir_decode(desc)) == 0) {
-		if (entry->len != dentry->d_name.len)
-			continue;
-		if (memcmp(entry->name, dentry->d_name.name, entry->len))
-			continue;
-		if (!(entry->fattr->valid & NFS_ATTR_FATTR))
-			continue;
+	switch (name.len) {
+		case 2:
+			if (name.name[0] == '.' && name.name[1] == '.')
+				return dget_parent(parent);
 			break;
+		case 1:
+			if (name.name[0] == '.')
+				return dget(parent);
 	}
-	return status;
-}
-
-/*
- * Use the cached Readdirplus results in order to avoid a LOOKUP call
- * whenever we believe that the parent directory has not changed.
- *
- * We assume that any file creation/rename changes the directory mtime.
- * As this results in a page cache invalidation whenever it occurs,
- * we don't require any other tests for cache coherency.
- */
-static
-int nfs_cached_lookup(struct inode *dir, struct dentry *dentry,
-			struct nfs_fh *fh, struct nfs_fattr *fattr)
-{
-	nfs_readdir_descriptor_t desc;
-	struct nfs_server *server;
-	struct nfs_entry entry;
-	struct page *page;
-	unsigned long timestamp;
-	int res;
-
-	if (!NFS_USE_READDIRPLUS(dir))
-		return -ENOENT;
-	server = NFS_SERVER(dir);
-	/* Don't use readdirplus unless the cache is stable */
-	if ((server->flags & NFS_MOUNT_NOAC) != 0
-			|| nfs_caches_unstable(dir)
-			|| nfs_attribute_timeout(dir))
-		return -ENOENT;
-	if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0)
-		return -ENOENT;
-	timestamp = NFS_I(dir)->readdir_timestamp;
-
-	entry.fh = fh;
-	entry.fattr = fattr;
-
-	desc.decode = NFS_PROTO(dir)->decode_dirent;
-	desc.entry = &entry;
-	desc.page_index = 0;
-	desc.plus = 1;
-
-	for(;(page = find_get_page(dir->i_mapping, desc.page_index)); desc.page_index++) {
-
-		res = -EIO;
-		if (PageUptodate(page)) {
-			void * kaddr = kmap_atomic(page, KM_USER0);
-			desc.ptr = kaddr;
-			res = find_dirent_name(&desc, page, dentry);
-			kunmap_atomic(kaddr, KM_USER0);
+	name.hash = full_name_hash(name.name, name.len);
+	dentry = d_lookup(parent, &name);
+	if (dentry != NULL)
+		return dentry;
+	if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
+		return NULL;
+	/* Note: caller is already holding the dir->i_sem! */
+	dentry = d_alloc(parent, &name);
+	if (dentry == NULL)
+		return NULL;
+	dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
+	if (!inode) {
+		dput(dentry);
+		return NULL;
 	}
-		page_cache_release(page);
-
-		if (res == 0)
-			goto out_found;
-		if (res != -EAGAIN)
-			break;
+	alias = d_add_unique(dentry, inode);
+	if (alias != NULL) {
+		dput(dentry);
+		dentry = alias;
 	}
-	return -ENOENT;
- out_found:
-	fattr->timestamp = timestamp;
-	return 0;
+	nfs_renew_times(dentry);
+	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+	return dentry;
 }

 /*
@@ -1045,15 +995,9 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	if (nd && (nd->flags & LOOKUP_CREATE))
 		open_flags = nd->intent.open.flags;

-	/*
-	 * The 0 argument passed into the create function should one day
-	 * contain the O_EXCL flag if requested. This allows NFSv3 to
-	 * select the appropriate create strategy. Currently open_namei
-	 * does not pass the create flags.
-	 */
 	lock_kernel();
 	nfs_begin_data_update(dir);
-	inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags);
+	inode = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
 	nfs_end_data_update(dir);
 	if (!IS_ERR(inode)) {
 		d_instantiate(dentry, inode);
@@ -1438,7 +1382,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		goto go_ahead;
 	if (S_ISDIR(new_inode->i_mode))
 		goto out;
-	else if (atomic_read(&new_dentry->d_count) > 1) {
+	else if (atomic_read(&new_dentry->d_count) > 2) {
 		int err;
 		/* copy the target dentry's name */
 		dentry = d_alloc(new_dentry->d_parent,
@@ -1453,10 +1397,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 			new_inode = NULL;
 			/* instantiate the replacement target */
 			d_instantiate(new_dentry, NULL);
-		}
-
+		} else if (atomic_read(&new_dentry->d_count) > 1) {
 		/* dentry still busy? */
-		if (atomic_read(&new_dentry->d_count) > 1) {
 #ifdef NFS_PARANOIA
 			printk("nfs_rename: target %s/%s busy, d_count=%d\n",
 			       new_dentry->d_parent->d_name.name,
@@ -1510,7 +1452,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs

 	if (cache->cred != cred
 			|| time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
-			|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR))
+			|| (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS))
 		return -ENOENT;
 	memcpy(res, cache, sizeof(*res));
 	return 0;
@@ -1524,6 +1466,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 		if (cache->cred)
 			put_rpccred(cache->cred);
 		cache->cred = get_rpccred(set->cred);
+		NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS;
 	}
 	cache->jiffies = set->jiffies;
 	cache->mask = set->mask;

--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -33,6 +33,7 @@
 * 08 Jul 2002	Version for 2.4.19, with bug fixes --trondmy
 * 08 Jun 2003	Port to 2.5 APIs  --cel
 * 31 Mar 2004	Handle direct I/O without VFS support  --cel
+ * 15 Sep 2004	Parallel async reads  --cel
 *
 */

@@ -43,6 +44,7 @@
 #include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/pagemap.h>
+#include <linux/kref.h>

 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
@@ -50,11 +52,27 @@

 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/atomic.h>

 #define NFSDBG_FACILITY		NFSDBG_VFS
-#define VERF_SIZE		(2 * sizeof(__u32))
 #define MAX_DIRECTIO_SIZE	(4096UL << PAGE_SHIFT)

+static kmem_cache_t *nfs_direct_cachep;
+
+/*
+ * This represents a set of asynchronous requests that we're waiting on
+ */
+struct nfs_direct_req {
+	struct kref		kref;		/* release manager */
+	struct list_head	list;		/* nfs_read_data structs */
+	wait_queue_head_t	wait;		/* wait for i/o completion */
+	struct page **		pages;		/* pages in our buffer */
+	unsigned int		npages;		/* count of pages */
+	atomic_t		complete,	/* i/os we're waiting for */
+				count,		/* bytes actually processed */
+				error;		/* any reported error */
+};
+

 /**
 * nfs_get_user_pages - find and set up pages underlying user's buffer
@@ -71,7 +89,8 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
 	unsigned long page_count;
 	size_t array_size;

-	/* set an arbitrary limit to prevent arithmetic overflow */
+	/* set an arbitrary limit to prevent type overflow */
+	/* XXX: this can probably be as large as INT_MAX */
 	if (size > MAX_DIRECTIO_SIZE)
 		return -EFBIG;

@@ -93,6 +112,8 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
 /**
 * nfs_free_user_pages - tear down page struct array
 * @pages: array of page struct pointers underlying target buffer
+ * @npages: number of pages in the array
+ * @do_dirty: dirty the pages as we release them
 */
 static void
 nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
@@ -107,77 +128,231 @@ nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
 }

 /**
- * nfs_direct_read_seg - Read in one iov segment.  Generate separate
- *                        read RPCs for each "rsize" bytes.
+ * nfs_direct_req_release - release  nfs_direct_req structure for direct read
+ * @kref: kref object embedded in an nfs_direct_req structure
+ *
+ */
+static void nfs_direct_req_release(struct kref *kref)
+{
+	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+	kmem_cache_free(nfs_direct_cachep, dreq);
+}
+
+/**
+ * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
+ * @count: count of bytes for the read request
+ * @rsize: local rsize setting
+ *
+ * Note we also set the number of requests we have in the dreq when we are
+ * done.  This prevents races with I/O completion so we will always wait
+ * until all requests have been dispatched and completed.
+ */
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+{
+	struct list_head *list;
+	struct nfs_direct_req *dreq;
+	unsigned int reads = 0;
+
+	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+	if (!dreq)
+		return NULL;
+
+	kref_init(&dreq->kref);
+	init_waitqueue_head(&dreq->wait);
+	INIT_LIST_HEAD(&dreq->list);
+	atomic_set(&dreq->count, 0);
+	atomic_set(&dreq->error, 0);
+
+	list = &dreq->list;
+	for(;;) {
+		struct nfs_read_data *data = nfs_readdata_alloc();
+
+		if (unlikely(!data)) {
+			while (!list_empty(list)) {
+				data = list_entry(list->next,
+						  struct nfs_read_data, pages);
+				list_del(&data->pages);
+				nfs_readdata_free(data);
+			}
+			kref_put(&dreq->kref, nfs_direct_req_release);
+			return NULL;
+		}
+
+		INIT_LIST_HEAD(&data->pages);
+		list_add(&data->pages, list);
+
+		data->req = (struct nfs_page *) dreq;
+		reads++;
+		if (nbytes <= rsize)
+			break;
+		nbytes -= rsize;
+	}
+	kref_get(&dreq->kref);
+	atomic_set(&dreq->complete, reads);
+	return dreq;
+}
+
+/**
+ * nfs_direct_read_result - handle a read reply for a direct read request
+ * @data: address of NFS READ operation control block
+ * @status: status of this NFS READ operation
+ *
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
+ */
+static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+{
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+	if (likely(status >= 0))
+		atomic_add(data->res.count, &dreq->count);
+	else
+		atomic_set(&dreq->error, status);
+
+	if (unlikely(atomic_dec_and_test(&dreq->complete))) {
+		nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+		wake_up(&dreq->wait);
+		kref_put(&dreq->kref, nfs_direct_req_release);
+	}
+}
+
+/**
+ * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
+ * @dreq: address of nfs_direct_req struct for this request
 * @inode: target inode
 * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ *
+ * For each nfs_read_data struct that was allocated on the list, dispatch
+ * an NFS READ operation
 */
-static int
-nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
-		unsigned long user_addr, size_t count, loff_t file_offset,
-		struct page **pages, int nr_pages)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
+		struct inode *inode, struct nfs_open_context *ctx,
+		unsigned long user_addr, size_t count, loff_t file_offset)
 {
-	const unsigned int rsize = NFS_SERVER(inode)->rsize;
-	int tot_bytes = 0;
-	int curpage = 0;
-	struct nfs_read_data	rdata = {
-		.inode		= inode,
-		.cred		= ctx->cred,
-		.args		= {
-			.fh		= NFS_FH(inode),
-			.context	= ctx,
-		},
-		.res		= {
-			.fattr		= &rdata.fattr,
-		},
-	};
+	struct list_head *list = &dreq->list;
+	struct page **pages = dreq->pages;
+	unsigned int curpage, pgbase;
+	unsigned int rsize = NFS_SERVER(inode)->rsize;

-	rdata.args.pgbase = user_addr & ~PAGE_MASK;
-	rdata.args.offset = file_offset;
+	curpage = 0;
+	pgbase = user_addr & ~PAGE_MASK;
 	do {
-		int result;
-
-		rdata.args.count = count;
-                if (rdata.args.count > rsize)
-                        rdata.args.count = rsize;
-		rdata.args.pages = &pages[curpage];
-
-		dprintk("NFS: direct read: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-			rdata.args.count, (long long) rdata.args.offset,
-			user_addr + tot_bytes, rdata.args.pgbase, curpage);
+		struct nfs_read_data *data;
+		unsigned int bytes;
+
+		bytes = rsize;
+		if (count < rsize)
+			bytes = count;
+
+		data = list_entry(list->next, struct nfs_read_data, pages);
+		list_del_init(&data->pages);
+
+		data->inode = inode;
+		data->cred = ctx->cred;
+		data->args.fh = NFS_FH(inode);
+		data->args.context = ctx;
+		data->args.offset = file_offset;
+		data->args.pgbase = pgbase;
+		data->args.pages = &pages[curpage];
+		data->args.count = bytes;
+		data->res.fattr = &data->fattr;
+		data->res.eof = 0;
+		data->res.count = bytes;
+
+		NFS_PROTO(inode)->read_setup(data);
+
+		data->task.tk_cookie = (unsigned long) inode;
+		data->task.tk_calldata = data;
+		data->task.tk_release = nfs_readdata_release;
+		data->complete = nfs_direct_read_result;

 		lock_kernel();
-		result = NFS_PROTO(inode)->read(&rdata);
+		rpc_execute(&data->task);
 		unlock_kernel();

-		if (result <= 0) {
-			if (tot_bytes > 0)
-				break;
-			if (result == -EISDIR)
-				result = -EINVAL;
-			return result;
-		}
+		dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+				data->task.tk_pid,
+				inode->i_sb->s_id,
+				(long long)NFS_FILEID(inode),
+				bytes,
+				(unsigned long long)data->args.offset);

-                tot_bytes += result;
-		if (rdata.res.eof)
-			break;
+		file_offset += bytes;
+		pgbase += bytes;
+		curpage += pgbase >> PAGE_SHIFT;
+		pgbase &= ~PAGE_MASK;

-                rdata.args.offset += result;
-		rdata.args.pgbase += result;
-		curpage += rdata.args.pgbase >> PAGE_SHIFT;
-		rdata.args.pgbase &= ~PAGE_MASK;
-		count -= result;
+		count -= bytes;
 	} while (count != 0);
+}
+
+/**
+ * nfs_direct_read_wait - wait for I/O completion for direct reads
+ * @dreq: request on which we are to wait
+ * @intr: whether or not this wait can be interrupted
+ *
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
+{
+	int result = 0;

-	/* XXX: should we zero the rest of the user's buffer if we
-	 *      hit eof? */
+	if (intr) {
+		result = wait_event_interruptible(dreq->wait,
+					(atomic_read(&dreq->complete) == 0));
+	} else {
+		wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
+	}

-	return tot_bytes;
+	if (!result)
+		result = atomic_read(&dreq->error);
+	if (!result)
+		result = atomic_read(&dreq->count);
+
+	kref_put(&dreq->kref, nfs_direct_req_release);
+	return (ssize_t) result;
+}
+
+/**
+ * nfs_direct_read_seg - Read in one iov segment.  Generate separate
+ *                        read RPCs for each "rsize" bytes.
+ * @inode: target inode
+ * @ctx: target file open context
+ * @user_addr: starting address of this segment of user's buffer
+ * @count: size of this segment
+ * @file_offset: offset in file to begin the operation
+ * @pages: array of addresses of page structs defining user's buffer
+ * @nr_pages: number of pages in the array
+ *
+ */
+static ssize_t nfs_direct_read_seg(struct inode *inode,
+		struct nfs_open_context *ctx, unsigned long user_addr,
+		size_t count, loff_t file_offset, struct page **pages,
+		unsigned int nr_pages)
+{
+	ssize_t result;
+	sigset_t oldset;
+	struct rpc_clnt *clnt = NFS_CLIENT(inode);
+	struct nfs_direct_req *dreq;
+
+	dreq = nfs_direct_read_alloc(count, NFS_SERVER(inode)->rsize);
+	if (!dreq)
+		return -ENOMEM;
+
+	dreq->pages = pages;
+	dreq->npages = nr_pages;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
+				 file_offset);
+	result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+	rpc_clnt_sigunmask(clnt, &oldset);
+
+	return result;
 }

 /**
@@ -189,9 +364,8 @@ nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx,
 * file_offset: offset in file to begin the operation
 * nr_segs: size of iovec array
 *
- * generic_file_direct_IO has already pushed out any non-direct
- * writes so that this read will see them when we read from the
- * server.
+ * We've already pushed out any non-direct writes so that this read
+ * will see them when we read from the server.
 */
 static ssize_t
 nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
@@ -220,8 +394,6 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
 		result = nfs_direct_read_seg(inode, ctx, user_addr, size,
 				file_offset, pages, page_count);

-		nfs_free_user_pages(pages, page_count, 1);
-
 		if (result <= 0) {
 			if (tot_bytes > 0)
 				break;
@@ -247,31 +419,31 @@ nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
 * @pages: array of addresses of page structs defining user's buffer
 * nr_pages: size of pages array
 */
-static int
-nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
-		unsigned long user_addr, size_t count, loff_t file_offset,
-		struct page **pages, int nr_pages)
+static ssize_t nfs_direct_write_seg(struct inode *inode,
+		struct nfs_open_context *ctx, unsigned long user_addr,
+		size_t count, loff_t file_offset, struct page **pages,
+		int nr_pages)
 {
 	const unsigned int wsize = NFS_SERVER(inode)->wsize;
 	size_t request;
-	int curpage, need_commit, result, tot_bytes;
+	int curpage, need_commit;
+	ssize_t result, tot_bytes;
 	struct nfs_writeverf first_verf;
-	struct nfs_write_data	wdata = {
-		.inode		= inode,
-		.cred		= ctx->cred,
-		.args		= {
-			.fh		= NFS_FH(inode),
-			.context	= ctx,
-		},
-		.res		= {
-			.fattr		= &wdata.fattr,
-			.verf		= &wdata.verf,
-		},
-	};
+	struct nfs_write_data *wdata;
+
+	wdata = nfs_writedata_alloc();
+	if (!wdata)
+		return -ENOMEM;

-	wdata.args.stable = NFS_UNSTABLE;
+	wdata->inode = inode;
+	wdata->cred = ctx->cred;
+	wdata->args.fh = NFS_FH(inode);
+	wdata->args.context = ctx;
+	wdata->args.stable = NFS_UNSTABLE;
 	if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
-		wdata.args.stable = NFS_FILE_SYNC;
+		wdata->args.stable = NFS_FILE_SYNC;
+	wdata->res.fattr = &wdata->fattr;
+	wdata->res.verf = &wdata->verf;

 	nfs_begin_data_update(inode);
 retry:
@@ -279,20 +451,20 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
 	tot_bytes = 0;
 	curpage = 0;
 	request = count;
-	wdata.args.pgbase = user_addr & ~PAGE_MASK;
-	wdata.args.offset = file_offset;
+	wdata->args.pgbase = user_addr & ~PAGE_MASK;
+	wdata->args.offset = file_offset;
 	do {
-		wdata.args.count = request;
-                if (wdata.args.count > wsize)
-                        wdata.args.count = wsize;
-		wdata.args.pages = &pages[curpage];
+		wdata->args.count = request;
+		if (wdata->args.count > wsize)
+			wdata->args.count = wsize;
+		wdata->args.pages = &pages[curpage];

 		dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-			wdata.args.count, (long long) wdata.args.offset,
-			user_addr + tot_bytes, wdata.args.pgbase, curpage);
+			wdata->args.count, (long long) wdata->args.offset,
+			user_addr + tot_bytes, wdata->args.pgbase, curpage);

 		lock_kernel();
-		result = NFS_PROTO(inode)->write(&wdata);
+		result = NFS_PROTO(inode)->write(wdata);
 		unlock_kernel();

 		if (result <= 0) {
@@ -302,20 +474,25 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
 		}

 		if (tot_bytes == 0)
-			memcpy(&first_verf.verifier, &wdata.verf.verifier,
-								VERF_SIZE);
-		if (wdata.verf.committed != NFS_FILE_SYNC) {
+			memcpy(&first_verf.verifier, &wdata->verf.verifier,
+						sizeof(first_verf.verifier));
+		if (wdata->verf.committed != NFS_FILE_SYNC) {
 			need_commit = 1;
-			if (memcmp(&first_verf.verifier,
-					&wdata.verf.verifier, VERF_SIZE))
+			if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
+					sizeof(first_verf.verifier)));
 				goto sync_retry;
 		}

 		tot_bytes += result;
-                wdata.args.offset += result;
-		wdata.args.pgbase += result;
-		curpage += wdata.args.pgbase >> PAGE_SHIFT;
-		wdata.args.pgbase &= ~PAGE_MASK;
+
+		/* in case of a short write: stop now, let the app recover */
+		if (result < wdata->args.count)
+			break;
+
+		wdata->args.offset += result;
+		wdata->args.pgbase += result;
+		curpage += wdata->args.pgbase >> PAGE_SHIFT;
+		wdata->args.pgbase &= ~PAGE_MASK;
 		request -= result;
 	} while (request != 0);

@@ -323,27 +500,27 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
 	 * Commit data written so far, even in the event of an error
 	 */
 	if (need_commit) {
-		wdata.args.count = tot_bytes;
-		wdata.args.offset = file_offset;
+		wdata->args.count = tot_bytes;
+		wdata->args.offset = file_offset;

 		lock_kernel();
-		result = NFS_PROTO(inode)->commit(&wdata);
+		result = NFS_PROTO(inode)->commit(wdata);
 		unlock_kernel();

 		if (result < 0 || memcmp(&first_verf.verifier,
-						&wdata.verf.verifier,
-						VERF_SIZE) != 0)
+					 &wdata->verf.verifier,
+					 sizeof(first_verf.verifier)) != 0)
 			goto sync_retry;
 	}
 	result = tot_bytes;

 out:
 	nfs_end_data_update_defer(inode);
-
+	nfs_writedata_free(wdata);
 	return result;

 sync_retry:
-	wdata.args.stable = NFS_FILE_SYNC;
+	wdata->args.stable = NFS_FILE_SYNC;
 	goto retry;
 }

@@ -360,9 +537,9 @@ nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx,
 * that non-direct readers might access, so they will pick up these
 * writes immediately.
 */
-static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx,
-		const struct iovec *iov, loff_t file_offset,
-		unsigned long nr_segs)
+static ssize_t nfs_direct_write(struct inode *inode,
+		struct nfs_open_context *ctx, const struct iovec *iov,
+		loff_t file_offset, unsigned long nr_segs)
 {
 	ssize_t tot_bytes = 0;
 	unsigned long seg = 0;
@@ -501,6 +678,8 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t

 	if (mapping->nrpages) {
 		retval = filemap_fdatawrite(mapping);
+		if (retval == 0)
+			retval = nfs_wb_all(inode);
 		if (retval == 0)
 			retval = filemap_fdatawait(mapping);
 		if (retval)
@@ -590,6 +769,8 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,

 	if (mapping->nrpages) {
 		retval = filemap_fdatawrite(mapping);
+		if (retval == 0)
+			retval = nfs_wb_all(inode);
 		if (retval == 0)
 			retval = filemap_fdatawait(mapping);
 		if (retval)
@@ -605,3 +786,21 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 out:
 	return retval;
 }
+
+int nfs_init_directcache(void)
+{
+	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
+						sizeof(struct nfs_direct_req),
+						0, SLAB_RECLAIM_ACCOUNT,
+						NULL, NULL);
+	if (nfs_direct_cachep == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nfs_destroy_directcache(void)
+{
+	if (kmem_cache_destroy(nfs_direct_cachep))
+		printk(KERN_INFO "nfs_direct_cache: not all structures were freed\n");
+}
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -295,10 +295,19 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
 	struct inode *inode = filp->f_mapping->host;
-	int status;
+	int status = 0;

 	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	else {
+		struct file_lock *cfl = posix_test_lock(filp, fl);
+		if (cfl != NULL) {
+			memcpy(fl, cfl, sizeof(*fl));
+			fl->fl_type = F_UNLCK;
+		}
+	}
 	unlock_kernel();
 	return status;
 }
@@ -325,7 +334,11 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 	 * 	still need to complete the unlock.
 	 */
 	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
+	else
+		status = posix_lock_file_wait(filp, fl);
 	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
 	return status;
 }
@@ -351,6 +364,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 		return status;

 	lock_kernel();
+	/* Use local locking if mounted with "-onolock" */
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
 		/* If we were signalled we still need to ensure that
 		 * we clean up any state on the server. We therefore
@@ -360,6 +375,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 		 */
 		if (status == -EINTR || status == -ERESTARTSYS)
 			posix_lock_file(filp, fl);
+	} else
+		status = posix_lock_file_wait(filp, fl);
 	unlock_kernel();
 	if (status < 0)
 		return status;
@@ -396,15 +413,6 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 	if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
 		return -ENOLCK;

-	if (NFS_PROTO(inode)->version != 4) {
-		/* Fake OK code if mounted without NLM support */
-		if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) {
-			if (IS_GETLK(cmd))
-				return LOCK_USE_CLNT;
-			return 0;
-		}
-	}
-
 	/*
 	 * No BSD flocks over NFS allowed.
 	 * Note: we could try to fake a POSIX lock request here by

--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -486,13 +486,27 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
 	if (error < 0)
 		goto out_err;

-	buf->f_frsize = server->wtmult;
+	/*
+	 * Current versions of glibc do not correctly handle the
+	 * case where f_frsize != f_bsize.  Eventually we want to
+	 * report the value of wtmult in this field.
+	 */
+	buf->f_frsize = sb->s_blocksize;
+
+	/*
+	 * On most *nix systems, f_blocks, f_bfree, and f_bavail
+	 * are reported in units of f_frsize.  Linux hasn't had
+	 * an f_frsize field in its statfs struct until recently,
+	 * thus historically Linux's sys_statfs reports these
+	 * fields in units of f_bsize.
+	 */
 	buf->f_bsize = sb->s_blocksize;
 	blockbits = sb->s_blocksize_bits;
 	blockres = (1 << blockbits) - 1;
 	buf->f_blocks = (res.tbytes + blockres) >> blockbits;
 	buf->f_bfree = (res.fbytes + blockres) >> blockbits;
 	buf->f_bavail = (res.abytes + blockres) >> blockbits;
+
 	buf->f_files = res.tfiles;
 	buf->f_ffree = res.afiles;

@@ -565,9 +579,9 @@ nfs_zap_caches(struct inode *inode)

 	memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
-		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
 	else
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;
 }

 /*
@@ -605,7 +619,7 @@ nfs_find_actor(struct inode *inode, void *opaque)
 		return 0;
 	if (nfs_compare_fh(NFS_FH(inode), fh))
 		return 0;
-	if (is_bad_inode(inode))
+	if (is_bad_inode(inode) || NFS_STALE(inode))
 		return 0;
 	return 1;
 }
@@ -766,13 +780,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 			vmtruncate(inode, attr->ia_size);
 		}
 	}
-	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
-		struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
-		if (*cred) {
-			put_rpccred(*cred);
-			*cred = NULL;
-		}
-	}
+	if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
+		NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS;
 	nfs_end_data_update(inode);
 	unlock_kernel();
 	return error;
@@ -949,14 +958,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	lock_kernel();
 	if (!inode || is_bad_inode(inode))
 		goto out_nowait;
-	if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode)
+	if (NFS_STALE(inode))
 		goto out_nowait;

 	while (NFS_REVALIDATING(inode)) {
 		status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
 		if (status < 0)
 			goto out_nowait;
-		if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC)
+		if (NFS_ATTRTIMEO(inode) == 0)
 			continue;
 		if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
 			continue;
@@ -968,14 +977,14 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	/* Protect against RPC races by saving the change attribute */
 	verifier = nfs_save_change_attribute(inode);
 	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
-	if (status) {
+	if (status != 0) {
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
 			 inode->i_sb->s_id,
 			 (long long)NFS_FILEID(inode), status);
 		if (status == -ESTALE) {
+			nfs_zap_caches(inode);
+			if (!S_ISDIR(inode->i_mode))
 				NFS_FLAGS(inode) |= NFS_INO_STALE;
-			if (inode != inode->i_sb->s_root->d_inode)
-				remove_inode_hash(inode);
 		}
 		goto out;
 	}
@@ -1014,7 +1023,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode));

-	NFS_FLAGS(inode) &= ~NFS_INO_STALE;
 out:
 	NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
 	wake_up(&nfsi->nfs_i_wait);
@@ -1161,7 +1169,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
 			|| inode->i_uid != fattr->uid
 			|| inode->i_gid != fattr->gid)
-		nfsi->flags |= NFS_INO_INVALID_ATTR;
+		nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;

 	/* Has the link count changed? */
 	if (inode->i_nlink != fattr->nlink)
@@ -1270,7 +1278,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 #endif
 		nfsi->change_attr = fattr->change_attr;
 		if (!data_unstable)
-			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS;
 	}

 	memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
@@ -1278,14 +1286,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign

 	if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
 	    inode->i_uid != fattr->uid ||
-	    inode->i_gid != fattr->gid) {
-		struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
-		if (*cred) {
-			put_rpccred(*cred);
-			*cred = NULL;
-		}
-		invalid |= NFS_INO_INVALID_ATTR;
-	}
+	    inode->i_gid != fattr->gid)
+		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS;

 	inode->i_mode = fattr->mode;
 	inode->i_nlink = fattr->nlink;
@@ -1335,7 +1337,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign
 	 */
 	nfs_invalidate_inode(inode);
 out_err:
-	return -EIO;
+	NFS_FLAGS(inode) |= NFS_INO_STALE;
+	return -ESTALE;
 }

 /*
@@ -1449,8 +1452,6 @@ static void nfs_kill_super(struct super_block *s)

 	kill_anon_super(s);

-	nfs4_renewd_prepare_shutdown(server);
-
 	if (server->client != NULL && !IS_ERR(server->client))
 		rpc_shutdown_client(server->client);
 	if (server->client_sys != NULL && !IS_ERR(server->client_sys))
@@ -1461,8 +1462,6 @@ static void nfs_kill_super(struct super_block *s)

 	rpciod_down();		/* release rpciod */

-	destroy_nfsv4_state(server);
-
 	if (server->hostname != NULL)
 		kfree(server->hostname);
 	kfree(server);
@@ -1543,9 +1542,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 		server->wsize = nfs_block_size(data->wsize, NULL);
 	server->flags = data->flags & NFS_MOUNT_FLAGMASK;

-	/* NFSv4 doesn't use NLM locking */
-	server->flags |= NFS_MOUNT_NONLM;
-
 	server->acregmin = data->acregmin*HZ;
 	server->acregmax = data->acregmax*HZ;
 	server->acdirmin = data->acdirmin*HZ;
@@ -1790,8 +1786,22 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,

 static void nfs4_kill_super(struct super_block *sb)
 {
+	struct nfs_server *server = NFS_SB(sb);
+
 	nfs_return_all_delegations(sb);
-	nfs_kill_super(sb);
+	kill_anon_super(sb);
+
+	nfs4_renewd_prepare_shutdown(server);
+
+	if (server->client != NULL && !IS_ERR(server->client))
+		rpc_shutdown_client(server->client);
+	rpciod_down();		/* release rpciod */
+
+	destroy_nfsv4_state(server);
+
+	if (server->hostname != NULL)
+		kfree(server->hostname);
+	kfree(server);
 }

 static struct file_system_type nfs4_fs_type = {
@@ -1821,9 +1831,13 @@ static struct file_system_type nfs4_fs_type = {
 extern int nfs_init_nfspagecache(void);
 extern void nfs_destroy_nfspagecache(void);
 extern int nfs_init_readpagecache(void);
-extern int nfs_destroy_readpagecache(void);
+extern void nfs_destroy_readpagecache(void);
 extern int nfs_init_writepagecache(void);
-extern int nfs_destroy_writepagecache(void);
+extern void nfs_destroy_writepagecache(void);
+#ifdef CONFIG_NFS_DIRECTIO
+extern int nfs_init_directcache(void);
+extern void nfs_destroy_directcache(void);
+#endif

 static kmem_cache_t * nfs_inode_cachep;

@@ -1904,6 +1918,12 @@ static int __init init_nfs_fs(void)
 	if (err)
 		goto out1;

+#ifdef CONFIG_NFS_DIRECTIO
+	err = nfs_init_directcache();
+	if (err)
+		goto out0;
+#endif
+
 #ifdef CONFIG_PROC_FS
 	rpc_proc_register(&nfs_rpcstat);
 #endif
@@ -1914,8 +1934,14 @@ static int __init init_nfs_fs(void)
 		goto out;
 	return 0;
 out:
+#ifdef CONFIG_PROC_FS
 	rpc_proc_unregister("nfs");
+#endif
 	nfs_destroy_writepagecache();
+#ifdef CONFIG_NFS_DIRECTIO
+out0:
+	nfs_destroy_directcache();
+#endif
 out1:
 	nfs_destroy_readpagecache();
 out2:
@@ -1928,6 +1954,9 @@ static int __init init_nfs_fs(void)

 static void __exit exit_nfs_fs(void)
 {
+#ifdef CONFIG_NFS_DIRECTIO
+	nfs_destroy_directcache();
+#endif
 	nfs_destroy_writepagecache();
 	nfs_destroy_readpagecache();
 	nfs_destroy_inodecache();

--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -80,10 +80,10 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	dprintk("%s: call  fsinfo\n", __FUNCTION__);
 	info->fattr->valid = 0;
 	status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
-	dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status);
+	dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
 	if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
 		status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
-		dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
+		dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	}
 	return status;
 }
@@ -101,7 +101,7 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	fattr->valid = 0;
 	status = rpc_call(server->client, NFS3PROC_GETATTR,
 			  fhandle, fattr, 0);
-	dprintk("NFS reply getattr\n");
+	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }

@@ -119,7 +119,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	dprintk("NFS call  setattr\n");
 	fattr->valid = 0;
 	status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
-	dprintk("NFS reply setattr\n");
+	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }

@@ -198,7 +198,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 		if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE))
 			entry->mask |= MAY_EXEC;
 	}
-	dprintk("NFS reply access, status = %d\n", status);
+	dprintk("NFS reply access: %d\n", status);
 	return status;
 }

@@ -296,7 +296,7 @@ static int nfs3_proc_commit(struct nfs_write_data *cdata)
 * For now, we don't implement O_EXCL.
 */
 static struct inode *
-nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		 int flags)
 {
 	struct nfs_fh		fhandle;
@@ -304,8 +304,8 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
 	struct nfs_fattr	dir_attr;
 	struct nfs3_createargs	arg = {
 		.fh		= NFS_FH(dir),
-		.name		= name->name,
-		.len		= name->len,
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
 		.sattr		= sattr,
 	};
 	struct nfs3_diropres	res = {
@@ -315,7 +315,7 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
 	};
 	int			status;

-	dprintk("NFS call  create %s\n", name->name);
+	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	arg.createmode = NFS3_CREATE_UNCHECKED;
 	if (flags & O_EXCL) {
 		arg.createmode  = NFS3_CREATE_EXCLUSIVE;
@@ -353,7 +353,7 @@ nfs3_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
 	if (status != 0)
 		goto out;
 	if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) {
-		status = nfs3_proc_lookup(dir, name, &fhandle, &fattr);
+		status = nfs3_proc_lookup(dir, &dentry->d_name, &fhandle, &fattr);
 		if (status != 0)
 			goto out;
 	}

--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -477,7 +477,7 @@ static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, st
 /*
 * Returns an nfs4_state + an referenced inode
 */
-static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
@@ -491,7 +491,7 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
 	struct nfs_openargs o_arg = {
 		.fh             = NFS_FH(dir),
 		.open_flags	= flags,
-		.name           = name,
+		.name           = &dentry->d_name,
 		.server         = server,
 		.bitmask = server->attr_bitmask,
 		.claim = NFS4_OPEN_CLAIM_NULL,
@@ -581,14 +581,14 @@ static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct
 }


-struct nfs4_state *nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred)
+struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;

 	do {
-		status = _nfs4_do_open(dir, name, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -635,6 +635,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,

        fattr->valid = 0;

+	if (state != NULL)
+		msg.rpc_cred = state->owner->so_cred;
 	if (sattr->ia_valid & ATTR_SIZE)
 		nfs4_copy_stateid(&arg.stateid, state, NULL);
 	else
@@ -658,113 +660,100 @@ int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 	return err;
 }

-/* 
- * It is possible for data to be read/written from a mem-mapped file 
- * after the sys_close call (which hits the vfs layer as a flush).
- * This means that we can't safely call nfsv4 close on a file until 
- * the inode is cleared. This in turn means that we are not good
- * NFSv4 citizens - we do not indicate to the server to update the file's 
- * share state even when we are done with one of the three share 
- * stateid's in the inode.
- *
- * NOTE: Caller must be holding the sp->so_owner semaphore!
- */
-static int _nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
-{
-	struct nfs4_state_owner *sp = state->owner;
-	int status = 0;
-	struct nfs_closeargs arg = {
-		.fh		= NFS_FH(inode),
-	};
+struct nfs4_closedata {
+	struct inode *inode;
+	struct nfs4_state *state;
+	struct nfs_closeargs arg;
 	struct nfs_closeres res;
-	struct rpc_message msg = {
-		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
-		.rpc_argp	= &arg,
-		.rpc_resp	= &res,
-	};
+};

-	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-		return 0;
-	memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
-	/* Serialization for the sequence id */
-	arg.seqid = sp->so_seqid,
-	status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
+static void nfs4_close_done(struct rpc_task *task)
+{
+	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_state *state = calldata->state;
+	struct nfs4_state_owner *sp = state->owner;
+	struct nfs_server *server = NFS_SERVER(calldata->inode);

        /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
-	nfs4_increment_seqid(status, sp);
-	if (!status)
-		memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
-
-	return status;
-}
-
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
-{
-	struct nfs_server *server = NFS_SERVER(state->inode);
-	struct nfs4_exception exception = { };
-	int err;
-	do {
-		err = _nfs4_do_close(inode, state);
-		switch (err) {
+	nfs4_increment_seqid(task->tk_status, sp);
+	switch (task->tk_status) {
+		case 0:
+			state->state = calldata->arg.open_flags;
+			memcpy(&state->stateid, &calldata->res.stateid,
+					sizeof(state->stateid));
+			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
+			state->state = calldata->arg.open_flags;
 			nfs4_schedule_state_recovery(server->nfs4_state);
-				err = 0;
+			break;
 		default:
-				state->state = 0;
+			if (nfs4_async_handle_error(task, server) == -EAGAIN) {
+				rpc_restart_call(task);
+				return;
 			}
-		err = nfs4_handle_exception(server, err, &exception);
-	} while (exception.retry);
-	return err;
+	}
+	nfs4_put_open_state(state);
+	up(&sp->so_sema);
+	nfs4_put_state_owner(sp);
+	up_read(&server->nfs4_state->cl_sem);
+	kfree(calldata);
 }

-static int _nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) 
+static inline int nfs4_close_call(struct rpc_clnt *clnt, struct nfs4_closedata *calldata)
 {
-	struct nfs4_state_owner *sp = state->owner;
-	int status = 0;
-	struct nfs_closeargs arg = {
-		.fh		= NFS_FH(inode),
-		.seqid		= sp->so_seqid,
-		.open_flags	= mode,
-	};
-	struct nfs_closeres res;
 	struct rpc_message msg = {
-		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE],
-		.rpc_argp	= &arg,
-		.rpc_resp	= &res,
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
+		.rpc_argp = &calldata->arg,
+		.rpc_resp = &calldata->res,
+		.rpc_cred = calldata->state->owner->so_cred,
 	};
-
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-		return 0;
-	memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
-	status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, RPC_TASK_NOINTR);
-	nfs4_increment_seqid(status, sp);
-	if (!status)
-		memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
-
-	return status;
+	if (calldata->arg.open_flags != 0)
+		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+	return rpc_call_async(clnt, &msg, 0, nfs4_close_done, calldata);
 }

-int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) 
+/* 
+ * It is possible for data to be read/written from a mem-mapped file 
+ * after the sys_close call (which hits the vfs layer as a flush).
+ * This means that we can't safely call nfsv4 close on a file until 
+ * the inode is cleared. This in turn means that we are not good
+ * NFSv4 citizens - we do not indicate to the server to update the file's 
+ * share state even when we are done with one of the three share 
+ * stateid's in the inode.
+ *
+ * NOTE: Caller must be holding the sp->so_owner semaphore!
+ */
+int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode) 
 {
-	struct nfs_server *server = NFS_SERVER(state->inode);
-	struct nfs4_exception exception = { };
-	int err;
-	do {
-		err = _nfs4_do_downgrade(inode, state, mode);
-		switch (err) {
-			case -NFS4ERR_STALE_STATEID:
-			case -NFS4ERR_EXPIRED:
-				nfs4_schedule_state_recovery(server->nfs4_state);
-				err = 0;
-			default:
+	struct nfs4_closedata *calldata;
+	int status;
+
+	/* Tell caller we're done */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
 		state->state = mode;
+		return 0;
 	}
-		err = nfs4_handle_exception(server, err, &exception);
-	} while (exception.retry);
-	return err;
+	calldata = (struct nfs4_closedata *)kmalloc(sizeof(*calldata), GFP_KERNEL);
+	if (calldata == NULL)
+		return -ENOMEM;
+	calldata->inode = inode;
+	calldata->state = state;
+	calldata->arg.fh = NFS_FH(inode);
+	/* Serialization for the sequence id */
+	calldata->arg.seqid = state->owner->so_seqid;
+	calldata->arg.open_flags = mode;
+	memcpy(&calldata->arg.stateid, &state->stateid,
+			sizeof(calldata->arg.stateid));
+	status = nfs4_close_call(NFS_SERVER(inode)->client, calldata);
+	/*
+	 * Return -EINPROGRESS on success in order to indicate to the
+	 * caller that an asynchronous RPC call has been launched, and
+	 * that it will release the semaphores on completion.
+	 */
+	return (status == 0) ? -EINPROGRESS : status;
 }

 struct inode *
@@ -785,7 +774,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	}

 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
-	state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
+	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state))
 		return (struct inode *)state;
@@ -802,7 +791,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
 	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
 	if (IS_ERR(state))
-		state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred);
+		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
 	put_rpccred(cred);
 	if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0)
 		return 1;
@@ -1026,7 +1015,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 					FMODE_WRITE, cred);
 			if (IS_ERR(state))
 				state = nfs4_do_open(dentry->d_parent->d_inode,
-						&dentry->d_name, FMODE_WRITE,
+						dentry, FMODE_WRITE,
 						NULL, cred);
 			need_iput = 1;
 		}
@@ -1327,7 +1316,7 @@ static int nfs4_proc_commit(struct nfs_write_data *cdata)
 */

 static struct inode *
-nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                 int flags)
 {
 	struct inode *inode;
@@ -1335,7 +1324,7 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
 	struct rpc_cred *cred;

 	cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
-	state = nfs4_do_open(dir, name, flags, sattr, cred);
+	state = nfs4_do_open(dir, dentry, flags, sattr, cred);
 	put_rpccred(cred);
 	if (!IS_ERR(state)) {
 		inode = state->inode;

--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -445,7 +445,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
 		state->owner = owner;
 		atomic_inc(&owner->so_count);
 		list_add(&state->inode_states, &nfsi->open_states);
-		state->inode = inode;
+		state->inode = igrab(inode);
 		spin_unlock(&inode->i_lock);
 	} else {
 		spin_unlock(&inode->i_lock);
@@ -471,6 +471,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
 		list_del(&state->inode_states);
 	spin_unlock(&inode->i_lock);
 	list_del(&state->open_states);
+	iput(inode);
 	BUG_ON (state->state != 0);
 	nfs4_free_open_state(state);
 	nfs4_put_state_owner(owner);
@@ -486,7 +487,6 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 	struct nfs4_state_owner *owner = state->owner;
 	struct nfs4_client *clp = owner->so_client;
 	int newstate;
-	int status = 0;

 	atomic_inc(&owner->so_count);
 	down_read(&clp->cl_sem);
@@ -508,10 +508,8 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 			newstate |= FMODE_WRITE;
 		if (state->state == newstate)
 			goto out;
-		if (newstate != 0)
-			status = nfs4_do_downgrade(inode, state, newstate);
-		else
-			status = nfs4_do_close(inode, state);
+		if (nfs4_do_close(inode, state, newstate) == -EINPROGRESS)
+			return;
 	}
 out:
 	nfs4_put_open_state(state);

--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -63,12 +63,12 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 	dprintk("%s: call getattr\n", __FUNCTION__);
 	fattr->valid = 0;
 	status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
-	dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
+	dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	dprintk("%s: call statfs\n", __FUNCTION__);
 	status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
-	dprintk("%s: reply statfs %d\n", __FUNCTION__, status);
+	dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
 	if (status)
 		return status;
 	info->rtmax  = NFS_MAXDATA;
@@ -96,7 +96,7 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	fattr->valid = 0;
 	status = rpc_call(server->client, NFSPROC_GETATTR,
 				fhandle, fattr, 0);
-	dprintk("NFS reply getattr\n");
+	dprintk("NFS reply getattr: %d\n", status);
 	return status;
 }

@@ -114,7 +114,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	dprintk("NFS call  setattr\n");
 	fattr->valid = 0;
 	status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
-	dprintk("NFS reply setattr\n");
+	dprintk("NFS reply setattr: %d\n", status);
 	return status;
 }

@@ -213,15 +213,15 @@ static int nfs_proc_write(struct nfs_write_data *wdata)
 }

 static struct inode *
-nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
+nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		int flags)
 {
 	struct nfs_fh		fhandle;
 	struct nfs_fattr	fattr;
 	struct nfs_createargs	arg = {
 		.fh		= NFS_FH(dir),
-		.name		= name->name,
-		.len		= name->len,
+		.name		= dentry->d_name.name,
+		.len		= dentry->d_name.len,
 		.sattr		= sattr
 	};
 	struct nfs_diropok	res = {
@@ -231,7 +231,7 @@ nfs_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
 	int			status;

 	fattr.valid = 0;
-	dprintk("NFS call  create %s\n", name->name);
+	dprintk("NFS call  create %s\n", dentry->d_name.name);
 	status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
 	dprintk("NFS reply create: %d\n", status);
 	if (status == 0) {

--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,7 +24,6 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/mempool.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
@@ -39,25 +38,11 @@ static void nfs_readpage_result_partial(struct nfs_read_data *, int);
 static void nfs_readpage_result_full(struct nfs_read_data *, int);

 static kmem_cache_t *nfs_rdata_cachep;
-static mempool_t *nfs_rdata_mempool;
+mempool_t *nfs_rdata_mempool;

 #define MIN_POOL_READ	(32)

-static struct nfs_read_data *nfs_readdata_alloc(void)
-{
-	struct nfs_read_data   *p;
-	p = (struct nfs_read_data *)mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
-	if (p)
-		memset(p, 0, sizeof(*p));
-	return p;
-}
-
-static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
-{
-	mempool_free(p, nfs_rdata_mempool);
-}
-
-static void nfs_readdata_release(struct rpc_task *task)
+void nfs_readdata_release(struct rpc_task *task)
 {
        struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
        nfs_readdata_free(data);

--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -215,7 +215,6 @@ nfs_complete_unlink(struct dentry *dentry)
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
 	spin_unlock(&dentry->d_lock);
-	if (data->task.tk_rpcwait == &nfs_delete_queue)
 	rpc_wake_up_task(&data->task);
 	nfs_put_unlinkdata(data);
 }
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -61,7 +61,6 @@
 #include <linux/nfs_page.h>
 #include <asm/uaccess.h>
 #include <linux/smp_lock.h>
-#include <linux/mempool.h>

 #include "delegation.h"

@@ -83,49 +82,17 @@ static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);

 static kmem_cache_t *nfs_wdata_cachep;
-static mempool_t *nfs_wdata_mempool;
-static mempool_t *nfs_commit_mempool;
+mempool_t *nfs_wdata_mempool;
+mempool_t *nfs_commit_mempool;

 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);

-static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
-{
-	struct nfs_write_data	*p;
-	p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-	}
-	return p;
-}
-
-static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
-{
-	mempool_free(p, nfs_wdata_mempool);
-}
-
-static void nfs_writedata_release(struct rpc_task *task)
+void nfs_writedata_release(struct rpc_task *task)
 {
 	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
 	nfs_writedata_free(wdata);
 }

-static __inline__ struct nfs_write_data *nfs_commit_alloc(void)
-{
-	struct nfs_write_data	*p;
-	p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
-	if (p) {
-		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&p->pages);
-	}
-	return p;
-}
-
-static __inline__ void nfs_commit_free(struct nfs_write_data *p)
-{
-	mempool_free(p, nfs_commit_mempool);
-}
-
 /* Adjust the file length if we're writing beyond the end */
 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 {
@@ -184,11 +151,10 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	int		result, written = 0;
 	struct nfs_write_data *wdata;

-	wdata = kmalloc(sizeof(*wdata), GFP_NOFS);
+	wdata = nfs_writedata_alloc();
 	if (!wdata)
 		return -ENOMEM;

-	memset(wdata, 0, sizeof(*wdata));
 	wdata->flags = how;
 	wdata->cred = ctx->cred;
 	wdata->inode = inode;
@@ -238,8 +204,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,

 io_error:
 	nfs_end_data_update_defer(inode);
-
-	kfree(wdata);
+	nfs_writedata_free(wdata);
 	return written ? written : result;
 }

@@ -1199,7 +1164,8 @@ void nfs_writeback_done(struct rpc_task *task)
 		}
 		if (time_before(complain, jiffies)) {
 			printk(KERN_WARNING
-			       "NFS: Server wrote less than requested.\n");
+			       "NFS: Server wrote zero bytes, expected %u.\n",
+					argp->count);
 			complain = jiffies + 300 * HZ;
 		}
 		/* Can't do anything about it except throw an error. */

--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -199,6 +199,7 @@ static inline int dname_external(struct dentry *dentry)
 * These are the low-level FS interfaces to the dcache..
 */
 extern void d_instantiate(struct dentry *, struct inode *);
+extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
 extern void d_delete(struct dentry *);

 /* allocate/de-allocate */
@@ -242,6 +243,23 @@ static inline void d_add(struct dentry *entry, struct inode *inode)
 	d_rehash(entry);
 }

+/**
+ * d_add_unique - add dentry to hash queues without aliasing
+ * @entry: dentry to add
+ * @inode: The inode to attach to this dentry
+ *
+ * This adds the entry to the hash queues and initializes @inode.
+ * The entry was actually filled in earlier during d_alloc().
+ */
+static inline struct dentry *d_add_unique(struct dentry *entry, struct inode *inode)
+{
+	struct dentry *res;
+
+	res = d_instantiate_unique(entry, inode);
+	d_rehash(res != NULL ? res : entry);
+	return res;
+}
+
 /* used for rename() and baskets */
 extern void d_move(struct dentry *, struct dentry *);


--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1189,11 +1189,6 @@ extern long do_mount(char *, char *, char *, unsigned long, void *);

 extern int vfs_statfs(struct super_block *, struct kstatfs *);

-/* Return value for VFS lock functions - tells locks.c to lock conventionally
- * REALLY kosha for root NFS and nfs_lock
- */ 
-#define LOCK_USE_CLNT 1
-
 #define FLOCK_VERIFY_READ  1
 #define FLOCK_VERIFY_WRITE 2


--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -30,6 +30,7 @@
 #include <linux/nfs_xdr.h>
 #include <linux/rwsem.h>
 #include <linux/workqueue.h>
+#include <linux/mempool.h>

 /*
 * Enable debugging support for nfs client.
@@ -201,6 +202,7 @@ struct nfs_inode {
 #define NFS_INO_INVALID_ATTR	0x0008		/* cached attrs are invalid */
 #define NFS_INO_INVALID_DATA	0x0010		/* cached data is invalid */
 #define NFS_INO_INVALID_ATIME	0x0020		/* cached atime is invalid */
+#define NFS_INO_INVALID_ACCESS	0x0040		/* cached access cred invalid */

 static inline struct nfs_inode *NFS_I(struct inode *inode)
 {
@@ -239,7 +241,7 @@ static inline int nfs_caches_unstable(struct inode *inode)
 static inline void NFS_CACHEINV(struct inode *inode)
 {
 	if (!nfs_caches_unstable(inode))
-		NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR;
+		NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS;
 }

 static inline int nfs_server_capable(struct inode *inode, int cap)
@@ -424,6 +426,44 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
 	return nfs_wb_page_priority(inode, page, 0);
 }

+/*
+ * Allocate and free nfs_write_data structures
+ */
+extern mempool_t *nfs_wdata_mempool;
+extern mempool_t *nfs_commit_mempool;
+
+static inline struct nfs_write_data *nfs_writedata_alloc(void)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+	}
+	return p;
+}
+
+static inline void nfs_writedata_free(struct nfs_write_data *p)
+{
+	mempool_free(p, nfs_wdata_mempool);
+}
+
+extern void  nfs_writedata_release(struct rpc_task *task);
+
+static inline struct nfs_write_data *nfs_commit_alloc(void)
+{
+	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+	if (p) {
+		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+	}
+	return p;
+}
+
+static inline void nfs_commit_free(struct nfs_write_data *p)
+{
+	mempool_free(p, nfs_commit_mempool);
+}
+
 /* Hack for future NFS swap support */
 #ifndef IS_SWAPFILE
 # define IS_SWAPFILE(inode)	(0)
@@ -438,6 +478,26 @@ extern int  nfs_readpages(struct file *, struct address_space *,
 extern int  nfs_pagein_list(struct list_head *, int);
 extern void nfs_readpage_result(struct rpc_task *);

+/*
+ * Allocate and free nfs_read_data structures
+ */
+extern mempool_t *nfs_rdata_mempool;
+
+static inline struct nfs_read_data *nfs_readdata_alloc(void)
+{
+	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+	if (p)
+		memset(p, 0, sizeof(*p));
+	return p;
+}
+
+static inline void nfs_readdata_free(struct nfs_read_data *p)
+{
+	mempool_free(p, nfs_rdata_mempool);
+}
+
+extern void  nfs_readdata_release(struct rpc_task *task);
+
 /*
 * linux/fs/mount_clnt.c
 * (Used only by nfsroot module)
@@ -651,8 +711,7 @@ extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
 extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
 extern int nfs4_proc_renew(struct nfs4_client *);
-extern int nfs4_do_close(struct inode *, struct nfs4_state *);
-extern int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
+extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
 extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
 extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);

--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -681,7 +681,7 @@ struct nfs_rpc_ops {
 	int	(*read)    (struct nfs_read_data *);
 	int	(*write)   (struct nfs_write_data *);
 	int	(*commit)  (struct nfs_write_data *);
-	struct inode *	(*create)  (struct inode *, struct qstr *,
+	struct inode *	(*create)  (struct inode *, struct dentry *,
 			    struct iattr *, int);
 	int	(*remove)  (struct inode *, struct qstr *);
 	int	(*unlink_setup)  (struct rpc_message *,

--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -51,7 +51,6 @@ struct rpc_cred {
 };
 #define RPCAUTH_CRED_LOCKED	0x0001
 #define RPCAUTH_CRED_UPTODATE	0x0002
-#define RPCAUTH_CRED_DEAD	0x0004

 #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0

@@ -131,7 +130,6 @@ int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
-int			rpcauth_deadcred(struct rpc_task *);
 void			rpcauth_init_credcache(struct rpc_auth *);
 void			rpcauth_free_credcache(struct rpc_auth *);


--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -11,7 +11,9 @@

 #include <linux/timer.h>
 #include <linux/sunrpc/types.h>
+#include <linux/spinlock.h>
 #include <linux/wait.h>
+#include <linux/workqueue.h>
 #include <linux/sunrpc/xdr.h>

 /*
@@ -25,11 +27,18 @@ struct rpc_message {
 	struct rpc_cred *	rpc_cred;	/* Credentials */
 };

+struct rpc_wait_queue;
+struct rpc_wait {
+	struct list_head	list;		/* wait queue links */
+	struct list_head	links;		/* Links to related tasks */
+	wait_queue_head_t	waitq;		/* sync: sleep on this q */
+	struct rpc_wait_queue *	rpc_waitq;	/* RPC wait queue we're on */
+};
+
 /*
 * This is the RPC task struct
 */
 struct rpc_task {
-	struct list_head	tk_list;	/* wait queue links */
 #ifdef RPC_DEBUG
 	unsigned long		tk_magic;	/* 0xf00baa */
 #endif
@@ -37,7 +46,6 @@ struct rpc_task {
 	struct rpc_clnt *	tk_client;	/* RPC client */
 	struct rpc_rqst *	tk_rqstp;	/* RPC request */
 	int			tk_status;	/* result of last operation */
-	struct rpc_wait_queue *	tk_rpcwait;	/* RPC wait queue we're on */

 	/*
 	 * RPC call state
@@ -70,13 +78,18 @@ struct rpc_task {
 	 * you have a pathological interest in kernel oopses.
 	 */
 	struct timer_list	tk_timer;	/* kernel timer */
-	wait_queue_head_t	tk_wait;	/* sync: sleep on this q */
 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned char		tk_active   : 1;/* Task has been activated */
 	unsigned char		tk_priority : 2;/* Task priority */
 	unsigned long		tk_runstate;	/* Task run status */
-	struct list_head	tk_links;	/* links to related tasks */
+	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
+						 * be any workqueue
+						 */
+	union {
+		struct work_struct	tk_work;	/* Async task work queue */
+		struct rpc_wait		tk_wait;	/* RPC wait */
+	} u;
 #ifdef RPC_DEBUG
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
@@ -87,11 +100,11 @@ struct rpc_task {
 /* support walking a list of tasks on a wait queue */
 #define	task_for_each(task, pos, head) \
 	list_for_each(pos, head) \
-		if ((task=list_entry(pos, struct rpc_task, tk_list)),1)
+		if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1)

 #define	task_for_first(task, head) \
 	if (!list_empty(head) &&  \
-	    ((task=list_entry((head)->next, struct rpc_task, tk_list)),1))
+	    ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))

 /* .. and walking list of all tasks */
 #define	alltask_for_each(task, pos, head) \
@@ -126,22 +139,39 @@ typedef void			(*rpc_action)(struct rpc_task *);
 #define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
 #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)

-#define RPC_TASK_SLEEPING	0
-#define RPC_TASK_RUNNING	1
-#define RPC_IS_SLEEPING(t)	(test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
-#define RPC_IS_RUNNING(t)	(test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
+#define RPC_TASK_RUNNING	0
+#define RPC_TASK_QUEUED		1
+#define RPC_TASK_WAKEUP		2
+#define RPC_TASK_HAS_TIMER	3

+#define RPC_IS_RUNNING(t)	(test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
 #define rpc_set_running(t)	(set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
-#define rpc_clear_running(t)	(clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
+#define rpc_test_and_set_running(t) \
+				(test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
+#define rpc_clear_running(t)	\
+	do { \
+		smp_mb__before_clear_bit(); \
+		clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \
+		smp_mb__after_clear_bit(); \
+	} while (0)

-#define rpc_set_sleeping(t)	(set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate))
+#define RPC_IS_QUEUED(t)	(test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
+#define rpc_set_queued(t)	(set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate))
+#define rpc_clear_queued(t)	\
+	do { \
+		smp_mb__before_clear_bit(); \
+		clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \
+		smp_mb__after_clear_bit(); \
+	} while (0)

-#define rpc_clear_sleeping(t) \
+#define rpc_start_wakeup(t) \
+	(test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0)
+#define rpc_finish_wakeup(t) \
 	do { \
 		smp_mb__before_clear_bit(); \
-		clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \
+		clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \
 		smp_mb__after_clear_bit(); \
-	} while(0)
+	} while (0)

 /*
 * Task priorities.
@@ -157,6 +187,7 @@ typedef void			(*rpc_action)(struct rpc_task *);
 * RPC synchronization objects
 */
 struct rpc_wait_queue {
+	spinlock_t		lock;
 	struct list_head	tasks[RPC_NR_PRIORITY];	/* task queue for each priority level */
 	unsigned long		cookie;			/* cookie of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
@@ -177,6 +208,7 @@ struct rpc_wait_queue {

 #ifndef RPC_DEBUG
 # define RPC_WAITQ_INIT(var,qname) { \
+		.lock = SPIN_LOCK_UNLOCKED, \
 		.tasks = { \
 			[0] = LIST_HEAD_INIT(var.tasks[0]), \
 			[1] = LIST_HEAD_INIT(var.tasks[1]), \
@@ -185,6 +217,7 @@ struct rpc_wait_queue {
 	}
 #else
 # define RPC_WAITQ_INIT(var,qname) { \
+		.lock = SPIN_LOCK_UNLOCKED, \
 		.tasks = { \
 			[0] = LIST_HEAD_INIT(var.tasks[0]), \
 			[1] = LIST_HEAD_INIT(var.tasks[1]), \
@@ -209,13 +242,10 @@ void		rpc_killall_tasks(struct rpc_clnt *);
 int		rpc_execute(struct rpc_task *);
 void		rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
 					rpc_action action);
-int		rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *);
-void		rpc_remove_wait_queue(struct rpc_task *);
 void		rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
 void		rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
 void		rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
 					rpc_action action, rpc_action timer);
-void		rpc_add_timer(struct rpc_task *, rpc_action);
 void		rpc_wake_up_task(struct rpc_task *);
 void		rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);

--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -214,8 +214,6 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	list_for_each_safe(pos, next, &auth->au_credcache[nr]) {
 		struct rpc_cred *entry;
 	       	entry = list_entry(pos, struct rpc_cred, cr_hash);
-		if (entry->cr_flags & RPCAUTH_CRED_DEAD)
-			continue;
 		if (rpcauth_prune_expired(entry, &free))
 			continue;
 		if (entry->cr_ops->crmatch(acred, entry, taskflags)) {
@@ -307,9 +305,6 @@ put_rpccred(struct rpc_cred *cred)
 	if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
 		return;

-	if ((cred->cr_flags & RPCAUTH_CRED_DEAD) && !list_empty(&cred->cr_hash))
-		list_del_init(&cred->cr_hash);
-
 	if (list_empty(&cred->cr_hash)) {
 		spin_unlock(&rpc_credcache_lock);
 		rpcauth_crdestroy(cred);
@@ -413,10 +408,3 @@ rpcauth_uptodatecred(struct rpc_task *task)
 	return !(task->tk_msg.rpc_cred) ||
 		(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
 }
-
-int
-rpcauth_deadcred(struct rpc_task *task)
-{
-	return !(task->tk_msg.rpc_cred) ||
-		(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_DEAD);
-}
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -480,12 +480,14 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	if (!cred)
 		goto err;
 	if (gss_err)
-		cred->cr_flags |= RPCAUTH_CRED_DEAD;
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
 	else
 		gss_cred_set_ctx(cred, ctx);
 	spin_lock(&gss_auth->lock);
 	gss_msg = __gss_find_upcall(gss_auth, acred.uid);
 	if (gss_msg) {
+		if (gss_err)
+			gss_msg->msg.errno = -EACCES;
 		__gss_unhash_msg(gss_msg);
 		spin_unlock(&gss_auth->lock);
 		gss_release_msg(gss_msg);
@@ -740,7 +742,9 @@ gss_marshal(struct rpc_task *task, u32 *p, int ruid)
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx,
 			       GSS_C_QOP_DEFAULT, 
 			       &verf_buf, &mic);
-	if(maj_stat != 0){
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	} else if (maj_stat != 0) {
 		printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
 		goto out_put_ctx;
 	}
@@ -779,6 +783,7 @@ gss_validate(struct rpc_task *task, u32 *p)
 	struct xdr_netobj mic;
 	u32		flav,len;
 	u32		service;
+	u32		maj_stat;

 	dprintk("RPC: %4u gss_validate\n", task->tk_pid);

@@ -794,7 +799,10 @@ gss_validate(struct rpc_task *task, u32 *p)
 	mic.data = (u8 *)p;
 	mic.len = len;

-	if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state))
+	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	if (maj_stat)
 		goto out_bad;
       service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type,
 					gss_cred->gc_flavor);
@@ -821,11 +829,10 @@ gss_validate(struct rpc_task *task, u32 *p)
 }

 static inline int
-gss_wrap_req_integ(struct gss_cl_ctx *ctx,
-			kxdrproc_t encode, void *rqstp, u32 *p, void *obj)
+gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+		kxdrproc_t encode, struct rpc_rqst *rqstp, u32 *p, void *obj)
 {
-	struct rpc_rqst	*req = (struct rpc_rqst *)rqstp;
-	struct xdr_buf	*snd_buf = &req->rq_snd_buf;
+	struct xdr_buf	*snd_buf = &rqstp->rq_snd_buf;
 	struct xdr_buf	integ_buf;
 	u32             *integ_len = NULL;
 	struct xdr_netobj mic;
@@ -836,7 +843,7 @@ gss_wrap_req_integ(struct gss_cl_ctx *ctx,

 	integ_len = p++;
 	offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
-	*p++ = htonl(req->rq_seqno);
+	*p++ = htonl(rqstp->rq_seqno);

 	status = encode(rqstp, p, obj);
 	if (status)
@@ -858,7 +865,9 @@ gss_wrap_req_integ(struct gss_cl_ctx *ctx,
 	maj_stat = gss_get_mic(ctx->gc_gss_ctx,
 			GSS_C_QOP_DEFAULT, &integ_buf, &mic);
 	status = -EIO; /* XXX? */
-	if (maj_stat)
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+	else if (maj_stat)
 		return status;
 	q = xdr_encode_opaque(p, NULL, mic.len);

@@ -894,7 +903,8 @@ gss_wrap_req(struct rpc_task *task,
 			status = encode(rqstp, p, obj);
 			goto out;
 		case RPC_GSS_SVC_INTEGRITY:
-			status = gss_wrap_req_integ(ctx, encode, rqstp, p, obj);
+			status = gss_wrap_req_integ(cred, ctx, encode,
+								rqstp, p, obj);
 			goto out;
 		case RPC_GSS_SVC_PRIVACY:
 		default:
@@ -907,11 +917,10 @@ gss_wrap_req(struct rpc_task *task,
 }

 static inline int
-gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
-		kxdrproc_t decode, void *rqstp, u32 **p, void *obj)
+gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+		struct rpc_rqst *rqstp, u32 **p)
 {
-	struct rpc_rqst *req = (struct rpc_rqst *)rqstp;
-	struct xdr_buf	*rcv_buf = &req->rq_rcv_buf;
+	struct xdr_buf	*rcv_buf = &rqstp->rq_rcv_buf;
 	struct xdr_buf integ_buf;
 	struct xdr_netobj mic;
 	u32 data_offset, mic_offset;
@@ -926,7 +935,7 @@ gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,
 	mic_offset = integ_len + data_offset;
 	if (mic_offset > rcv_buf->len)
 		return status;
-	if (ntohl(*(*p)++) != req->rq_seqno)
+	if (ntohl(*(*p)++) != rqstp->rq_seqno)
 		return status;

 	if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
@@ -938,6 +947,8 @@ gss_unwrap_resp_integ(struct gss_cl_ctx *ctx,

 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf,
 			&mic, NULL);
+	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
 	if (maj_stat != GSS_S_COMPLETE)
 		return status;
 	return 0;
@@ -962,8 +973,7 @@ gss_unwrap_resp(struct rpc_task *task,
 		case RPC_GSS_SVC_NONE:
 			goto out_decode;
 		case RPC_GSS_SVC_INTEGRITY:
-			status = gss_unwrap_resp_integ(ctx, decode, 
-							rqstp, &p, obj);
+			status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
 			if (status)
 				goto out;
 			break;

--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -928,7 +928,7 @@ call_refreshresult(struct rpc_task *task)
 	task->tk_action = call_reserve;
 	if (status >= 0 && rpcauth_uptodatecred(task))
 		return;
-	if (rpcauth_deadcred(task)) {
+	if (status == -EACCES) {
 		rpc_exit(task, -EACCES);
 		return;
 	}
@@ -970,23 +970,31 @@ call_verify(struct rpc_task *task)
 	struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
 	int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
 	u32	*p = iov->iov_base, n;
+	int error = -EACCES;

 	if ((len -= 3) < 0)
-		goto garbage;
+		goto out_overflow;
 	p += 1;	/* skip XID */

 	if ((n = ntohl(*p++)) != RPC_REPLY) {
 		printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
-		goto garbage;
+		goto out_retry;
 	}
 	if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
-		int	error = -EACCES;
-
 		if (--len < 0)
-			goto garbage;
-		if ((n = ntohl(*p++)) != RPC_AUTH_ERROR) {
-			printk(KERN_WARNING "call_verify: RPC call rejected: %x\n", n);
-		} else if (--len < 0)
+			goto out_overflow;
+		switch ((n = ntohl(*p++))) {
+			case RPC_AUTH_ERROR:
+				break;
+			case RPC_MISMATCH:
+				printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__);
+				goto out_eio;
+			default:
+				printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n);
+				goto out_eio;
+		}
+		if (--len < 0)
+			goto out_overflow;
 		switch ((n = ntohl(*p++))) {
 		case RPC_AUTH_REJECTEDCRED:
 		case RPC_AUTH_REJECTEDVERF:
@@ -1017,20 +1025,18 @@ call_verify(struct rpc_task *task)
 		default:
 			printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
 			error = -EIO;
-		} else
-			goto garbage;
+		}
 		dprintk("RPC: %4d call_verify: call rejected %d\n",
 						task->tk_pid, n);
-		rpc_exit(task, error);
-		return NULL;
+		goto out_err;
 	}
 	if (!(p = rpcauth_checkverf(task, p))) {
 		printk(KERN_WARNING "call_verify: auth check failed\n");
-		goto garbage;		/* bad verifier, retry */
+		goto out_retry;		/* bad verifier, retry */
 	}
 	len = p - (u32 *)iov->iov_base - 1;
 	if (len < 0)
-		goto garbage;
+		goto out_overflow;
 	switch ((n = ntohl(*p++))) {
 	case RPC_SUCCESS:
 		return p;
@@ -1053,23 +1059,28 @@ call_verify(struct rpc_task *task)
 				task->tk_client->cl_server);
 		goto out_eio;
 	case RPC_GARBAGE_ARGS:
+		dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__);
 		break;			/* retry */
 	default:
 		printk(KERN_WARNING "call_verify: server accept status: %x\n", n);
 		/* Also retry */
 	}

-garbage:
-	dprintk("RPC: %4d call_verify: server saw garbage\n", task->tk_pid);
+out_retry:
 	task->tk_client->cl_stats->rpcgarbage++;
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
-		dprintk(KERN_WARNING "RPC: garbage, retrying %4d\n", task->tk_pid);
+		dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
 		task->tk_action = call_bind;
 		return NULL;
 	}
-	printk(KERN_WARNING "RPC: garbage, exit EIO\n");
+	printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
 out_eio:
-	rpc_exit(task, -EIO);
+	error = -EIO;
+out_err:
+	rpc_exit(task, error);
 	return NULL;
+out_overflow:
+	printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
+	goto out_retry;
 }
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -25,6 +25,7 @@

 #ifdef RPC_DEBUG
 #define RPCDBG_FACILITY		RPCDBG_SCHED
+#define RPC_TASK_MAGIC_ID	0xf00baa
 static int			rpc_task_id;
 #endif

@@ -41,16 +42,9 @@ static mempool_t	*rpc_buffer_mempool;

 static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
-
 static void			rpc_free(struct rpc_task *task);

-
-/*
- * When an asynchronous RPC task is activated within a bottom half
- * handler, or while executing another RPC task, it is put on
- * schedq, and rpciod is woken up.
- */
-static RPC_WAITQ(schedq, "schedq");
+static void			rpc_async_schedule(void *);

 /*
 * RPC tasks that create another task (e.g. for contacting the portmapper)
@@ -71,18 +65,10 @@ static LIST_HEAD(all_tasks);
 /*
 * rpciod-related stuff
 */
-static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle);
-static DECLARE_COMPLETION(rpciod_killer);
 static DECLARE_MUTEX(rpciod_sema);
 static unsigned int		rpciod_users;
-static pid_t			rpciod_pid;
-static int			rpc_inhibit;
+static struct workqueue_struct *rpciod_workqueue;

-/*
- * Spinlock for wait queues. Access to the latter also has to be
- * interrupt-safe in order to allow timers to wake up sleeping tasks.
- */
-static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED;
 /*
 * Spinlock for other critical sections of code.
 */
@@ -90,7 +76,7 @@ static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;

 /*
 * Disable the timer for a given RPC task. Should be called with
- * rpc_queue_lock and bh_disabled in order to avoid races within
+ * queue->lock and bh_disabled in order to avoid races within
 * rpc_run_timer().
 */
 static inline void
@@ -108,19 +94,19 @@ __rpc_disable_timer(struct rpc_task *task)
 * without calling del_timer_sync(). The latter could cause a
 * deadlock if called while we're holding spinlocks...
 */
-static void
-rpc_run_timer(struct rpc_task *task)
+static void rpc_run_timer(struct rpc_task *task)
 {
 	void (*callback)(struct rpc_task *);

-	spin_lock_bh(&rpc_queue_lock);
 	callback = task->tk_timeout_fn;
 	task->tk_timeout_fn = NULL;
-	spin_unlock_bh(&rpc_queue_lock);
-	if (callback) {
+	if (callback && RPC_IS_QUEUED(task)) {
 		dprintk("RPC: %4d running timer\n", task->tk_pid);
 		callback(task);
 	}
+	smp_mb__before_clear_bit();
+	clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
+	smp_mb__after_clear_bit();
 }

 /*
@@ -139,29 +125,21 @@ __rpc_add_timer(struct rpc_task *task, rpc_action timer)
 		task->tk_timeout_fn = timer;
 	else
 		task->tk_timeout_fn = __rpc_default_timer;
+	set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
 	mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
 }

-/*
- * Set up a timer for an already sleeping task.
- */
-void rpc_add_timer(struct rpc_task *task, rpc_action timer)
-{
-	spin_lock_bh(&rpc_queue_lock);
-	if (!RPC_IS_RUNNING(task))
-		__rpc_add_timer(task, timer);
-	spin_unlock_bh(&rpc_queue_lock);
-}
-
 /*
 * Delete any timer for the current task. Because we use del_timer_sync(),
- * this function should never be called while holding rpc_queue_lock.
+ * this function should never be called while holding queue->lock.
 */
 static inline void
 rpc_delete_timer(struct rpc_task *task)
 {
-	if (del_timer_sync(&task->tk_timer))
+	if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
+		del_singleshot_timer_sync(&task->tk_timer);
 		dprintk("RPC: %4d deleting timer\n", task->tk_pid);
+	}
 }

 /*
@@ -172,16 +150,17 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
 	struct list_head *q;
 	struct rpc_task *t;

+	INIT_LIST_HEAD(&task->u.tk_wait.links);
 	q = &queue->tasks[task->tk_priority];
 	if (unlikely(task->tk_priority > queue->maxpriority))
 		q = &queue->tasks[queue->maxpriority];
-	list_for_each_entry(t, q, tk_list) {
+	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_cookie == task->tk_cookie) {
-			list_add_tail(&task->tk_list, &t->tk_links);
+			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
 			return;
 		}
 	}
-	list_add_tail(&task->tk_list, q);
+	list_add_tail(&task->u.tk_wait.list, q);
 }

 /*
@@ -192,37 +171,21 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
 * improve overall performance.
 * Everyone else gets appended to the queue to ensure proper FIFO behavior.
 */
-static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
 {
-	if (task->tk_rpcwait == queue)
-		return 0;
+	BUG_ON (RPC_IS_QUEUED(task));

-	if (task->tk_rpcwait) {
-		printk(KERN_WARNING "RPC: doubly enqueued task!\n");
-		return -EWOULDBLOCK;
-	}
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_add_wait_queue_priority(queue, task);
 	else if (RPC_IS_SWAPPER(task))
-		list_add(&task->tk_list, &queue->tasks[0]);
+		list_add(&task->u.tk_wait.list, &queue->tasks[0]);
 	else
-		list_add_tail(&task->tk_list, &queue->tasks[0]);
-	task->tk_rpcwait = queue;
+		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
+	task->u.tk_wait.rpc_waitq = queue;
+	rpc_set_queued(task);

 	dprintk("RPC: %4d added to queue %p \"%s\"\n",
 				task->tk_pid, queue, rpc_qname(queue));
-
-	return 0;
-}
-
-int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task)
-{
-	int		result;
-
-	spin_lock_bh(&rpc_queue_lock);
-	result = __rpc_add_wait_queue(q, task);
-	spin_unlock_bh(&rpc_queue_lock);
-	return result;
 }

 /*
@@ -232,12 +195,12 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
 {
 	struct rpc_task *t;

-	if (!list_empty(&task->tk_links)) {
-		t = list_entry(task->tk_links.next, struct rpc_task, tk_list);
-		list_move(&t->tk_list, &task->tk_list);
-		list_splice_init(&task->tk_links, &t->tk_links);
+	if (!list_empty(&task->u.tk_wait.links)) {
+		t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
+		list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
+		list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
 	}
-	list_del(&task->tk_list);
+	list_del(&task->u.tk_wait.list);
 }

 /*
@@ -246,31 +209,17 @@ static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
 */
 static void __rpc_remove_wait_queue(struct rpc_task *task)
 {
-	struct rpc_wait_queue *queue = task->tk_rpcwait;
-
-	if (!queue)
-		return;
+	struct rpc_wait_queue *queue;
+	queue = task->u.tk_wait.rpc_waitq;

 	if (RPC_IS_PRIORITY(queue))
 		__rpc_remove_wait_queue_priority(task);
 	else
-		list_del(&task->tk_list);
-	task->tk_rpcwait = NULL;
-
+		list_del(&task->u.tk_wait.list);
 	dprintk("RPC: %4d removed from queue %p \"%s\"\n",
 				task->tk_pid, queue, rpc_qname(queue));
 }

-void
-rpc_remove_wait_queue(struct rpc_task *task)
-{
-	if (!task->tk_rpcwait)
-		return;
-	spin_lock_bh(&rpc_queue_lock);
-	__rpc_remove_wait_queue(task);
-	spin_unlock_bh(&rpc_queue_lock);
-}
-
 static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
 	queue->priority = priority;
@@ -293,6 +242,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
 {
 	int i;

+	spin_lock_init(&queue->lock);
 	for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
 		INIT_LIST_HEAD(&queue->tasks[i]);
 	queue->maxpriority = maxprio;
@@ -319,34 +269,31 @@ EXPORT_SYMBOL(rpc_init_wait_queue);
 * Note: If the task is ASYNC, this must be called with 
 * the spinlock held to protect the wait queue operation.
 */
-static inline void
-rpc_make_runnable(struct rpc_task *task)
+static void rpc_make_runnable(struct rpc_task *task)
 {
-	if (task->tk_timeout_fn) {
-		printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n");
+	int do_ret;
+
+	BUG_ON(task->tk_timeout_fn);
+	do_ret = rpc_test_and_set_running(task);
+	rpc_clear_queued(task);
+	if (do_ret)
 		return;
-	}
-	rpc_set_running(task);
 	if (RPC_IS_ASYNC(task)) {
-		if (RPC_IS_SLEEPING(task)) {
 		int status;
-			status = __rpc_add_wait_queue(&schedq, task);
+
+		INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
+		status = queue_work(task->tk_workqueue, &task->u.tk_work);
 		if (status < 0) {
 			printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
 			task->tk_status = status;
 			return;
 		}
-			rpc_clear_sleeping(task);
-			wake_up(&rpciod_idle);
-		}
-	} else {
-		rpc_clear_sleeping(task);
-		wake_up(&task->tk_wait);
-	}
+	} else
+		wake_up(&task->u.tk_wait.waitq);
 }

 /*
- * Place a newly initialized task on the schedq.
+ * Place a newly initialized task on the workqueue.
 */
 static inline void
 rpc_schedule_run(struct rpc_task *task)
@@ -355,33 +302,18 @@ rpc_schedule_run(struct rpc_task *task)
 	if (RPC_IS_ACTIVATED(task))
 		return;
 	task->tk_active = 1;
-	rpc_set_sleeping(task);
 	rpc_make_runnable(task);
 }

-/*
- *	For other people who may need to wake the I/O daemon
- *	but should (for now) know nothing about its innards
- */
-void rpciod_wake_up(void)
-{
-	if(rpciod_pid==0)
-		printk(KERN_ERR "rpciod: wot no daemon?\n");
-	wake_up(&rpciod_idle);
-}
-
 /*
 * Prepare for sleeping on a wait queue.
 * By always appending tasks to the list we ensure FIFO behavior.
 * NB: An RPC task will only receive interrupt-driven events as long
 * as it's on a wait queue.
 */
-static void
-__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
+static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 			rpc_action action, rpc_action timer)
 {
-	int status;
-
 	dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid,
 				rpc_qname(q), jiffies);

@@ -391,68 +323,47 @@ __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 	}

 	/* Mark the task as being activated if so needed */
-	if (!RPC_IS_ACTIVATED(task)) {
+	if (!RPC_IS_ACTIVATED(task))
 		task->tk_active = 1;
-		rpc_set_sleeping(task);
-	}

-	status = __rpc_add_wait_queue(q, task);
-	if (status) {
-		printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
-		task->tk_status = status;
-	} else {
-		rpc_clear_running(task);
-		if (task->tk_callback) {
-			dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid);
-			BUG();
-		}
+	__rpc_add_wait_queue(q, task);
+
+	BUG_ON(task->tk_callback != NULL);
 	task->tk_callback = action;
 	__rpc_add_timer(task, timer);
-	}
 }

-void
-rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
+void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 				rpc_action action, rpc_action timer)
 {
 	/*
 	 * Protect the queue operations.
 	 */
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&q->lock);
 	__rpc_sleep_on(q, task, action, timer);
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&q->lock);
 }

 /**
- * __rpc_wake_up_task - wake up a single rpc_task
+ * __rpc_do_wake_up_task - wake up a single rpc_task
 * @task: task to be woken up
 *
- * Caller must hold rpc_queue_lock
+ * Caller must hold queue->lock, and have cleared the task queued flag.
 */
-static void
-__rpc_wake_up_task(struct rpc_task *task)
+static void __rpc_do_wake_up_task(struct rpc_task *task)
 {
-	dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n",
-					task->tk_pid, jiffies, rpc_inhibit);
+	dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies);

 #ifdef RPC_DEBUG
-	if (task->tk_magic != 0xf00baa) {
-		printk(KERN_ERR "RPC: attempt to wake up non-existing task!\n");
-		rpc_debug = ~0;
-		rpc_show_tasks();
-		return;
-	}
+	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
 #endif
 	/* Has the task been executed yet? If not, we cannot wake it up! */
 	if (!RPC_IS_ACTIVATED(task)) {
 		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
 		return;
 	}
-	if (RPC_IS_RUNNING(task))
-		return;

 	__rpc_disable_timer(task);
-	if (task->tk_rpcwait != &schedq)
 	__rpc_remove_wait_queue(task);

 	rpc_make_runnable(task);
@@ -460,6 +371,18 @@ __rpc_wake_up_task(struct rpc_task *task)
 	dprintk("RPC:      __rpc_wake_up_task done\n");
 }

+/*
+ * Wake up the specified task
+ */
+static void __rpc_wake_up_task(struct rpc_task *task)
+{
+	if (rpc_start_wakeup(task)) {
+		if (RPC_IS_QUEUED(task))
+			__rpc_do_wake_up_task(task);
+		rpc_finish_wakeup(task);
+	}
+}
+
 /*
 * Default timeout handler if none specified by user
 */
@@ -474,14 +397,18 @@ __rpc_default_timer(struct rpc_task *task)
 /*
 * Wake up the specified task
 */
-void
-rpc_wake_up_task(struct rpc_task *task)
+void rpc_wake_up_task(struct rpc_task *task)
 {
-	if (RPC_IS_RUNNING(task))
-		return;
-	spin_lock_bh(&rpc_queue_lock);
-	__rpc_wake_up_task(task);
-	spin_unlock_bh(&rpc_queue_lock);
+	if (rpc_start_wakeup(task)) {
+		if (RPC_IS_QUEUED(task)) {
+			struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
+
+			spin_lock_bh(&queue->lock);
+			__rpc_do_wake_up_task(task);
+			spin_unlock_bh(&queue->lock);
+		}
+		rpc_finish_wakeup(task);
+	}
 }

 /*
@@ -497,11 +424,11 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
 	 */
 	q = &queue->tasks[queue->priority];
 	if (!list_empty(q)) {
-		task = list_entry(q->next, struct rpc_task, tk_list);
+		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
 		if (queue->cookie == task->tk_cookie) {
 			if (--queue->nr)
 				goto out;
-			list_move_tail(&task->tk_list, q);
+			list_move_tail(&task->u.tk_wait.list, q);
 		}
 		/*
 		 * Check if we need to switch queues.
@@ -519,7 +446,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
 		else
 			q = q - 1;
 		if (!list_empty(q)) {
-			task = list_entry(q->next, struct rpc_task, tk_list);
+			task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
 			goto new_queue;
 		}
 	} while (q != &queue->tasks[queue->priority]);
@@ -544,14 +471,14 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
 	struct rpc_task	*task = NULL;

 	dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&queue->lock);
 	if (RPC_IS_PRIORITY(queue))
 		task = __rpc_wake_up_next_priority(queue);
 	else {
 		task_for_first(task, &queue->tasks[0])
 			__rpc_wake_up_task(task);
 	}
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&queue->lock);

 	return task;
 }
@@ -560,25 +487,25 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
 * rpc_wake_up - wake up all rpc_tasks
 * @queue: rpc_wait_queue on which the tasks are sleeping
 *
- * Grabs rpc_queue_lock
+ * Grabs queue->lock
 */
 void rpc_wake_up(struct rpc_wait_queue *queue)
 {
 	struct rpc_task *task;

 	struct list_head *head;
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&queue->lock);
 	head = &queue->tasks[queue->maxpriority];
 	for (;;) {
 		while (!list_empty(head)) {
-			task = list_entry(head->next, struct rpc_task, tk_list);
+			task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
 			__rpc_wake_up_task(task);
 		}
 		if (head == &queue->tasks[0])
 			break;
 		head--;
 	}
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&queue->lock);
 }

 /**
@@ -586,18 +513,18 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
 * @queue: rpc_wait_queue on which the tasks are sleeping
 * @status: status value to set
 *
- * Grabs rpc_queue_lock
+ * Grabs queue->lock
 */
 void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 {
 	struct list_head *head;
 	struct rpc_task *task;

-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&queue->lock);
 	head = &queue->tasks[queue->maxpriority];
 	for (;;) {
 		while (!list_empty(head)) {
-			task = list_entry(head->next, struct rpc_task, tk_list);
+			task = list_entry(head->next, struct rpc_task, u.tk_wait.list);
 			task->tk_status = status;
 			__rpc_wake_up_task(task);
 		}
@@ -605,7 +532,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 			break;
 		head--;
 	}
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&queue->lock);
 }

 /*
@@ -629,21 +556,22 @@ __rpc_atrun(struct rpc_task *task)
 /*
 * This is the RPC `scheduler' (or rather, the finite state machine).
 */
-static int
-__rpc_execute(struct rpc_task *task)
+static int __rpc_execute(struct rpc_task *task)
 {
 	int		status = 0;

 	dprintk("RPC: %4d rpc_execute flgs %x\n",
 				task->tk_pid, task->tk_flags);

-	if (!RPC_IS_RUNNING(task)) {
-		printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n");
-		return 0;
-	}
+	BUG_ON(RPC_IS_QUEUED(task));

 restarted:
 	while (1) {
+		/*
+		 * Garbage collection of pending timers...
+		 */
+		rpc_delete_timer(task);
+
 		/*
 		 * Execute any pending callback.
 		 */
@@ -660,7 +588,9 @@ __rpc_execute(struct rpc_task *task)
 			 */
 			save_callback=task->tk_callback;
 			task->tk_callback=NULL;
+			lock_kernel();
 			save_callback(task);
+			unlock_kernel();
 		}

 		/*
@@ -668,43 +598,35 @@ __rpc_execute(struct rpc_task *task)
 		 * tk_action may be NULL when the task has been killed
 		 * by someone else.
 		 */
-		if (RPC_IS_RUNNING(task)) {
-			/*
-			 * Garbage collection of pending timers...
-			 */
-			rpc_delete_timer(task);
+		if (!RPC_IS_QUEUED(task)) {
 			if (!task->tk_action)
 				break;
+			lock_kernel();
 			task->tk_action(task);
-			/* micro-optimization to avoid spinlock */
-			if (RPC_IS_RUNNING(task))
-				continue;
+			unlock_kernel();
 		}

 		/*
-		 * Check whether task is sleeping.
+		 * Lockless check for whether task is sleeping or not.
 		 */
-		spin_lock_bh(&rpc_queue_lock);
-		if (!RPC_IS_RUNNING(task)) {
-			rpc_set_sleeping(task);
+		if (!RPC_IS_QUEUED(task))
+			continue;
+		rpc_clear_running(task);
 		if (RPC_IS_ASYNC(task)) {
-				spin_unlock_bh(&rpc_queue_lock);
+			/* Careful! we may have raced... */
+			if (RPC_IS_QUEUED(task))
 				return 0;
+			if (rpc_test_and_set_running(task))
+				return 0;
+			continue;
 		}
-		}
-		spin_unlock_bh(&rpc_queue_lock);

-		if (!RPC_IS_SLEEPING(task))
-			continue;
 		/* sync task: sleep here */
 		dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid);
-		if (current->pid == rpciod_pid)
-			printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
-
 		if (RPC_TASK_UNINTERRUPTIBLE(task)) {
-			__wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
+			__wait_event(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task));
 		} else {
-			__wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status);
+			__wait_event_interruptible(task->u.tk_wait.waitq, !RPC_IS_QUEUED(task), status);
 			/*
 			 * When a sync task receives a signal, it exits with
 			 * -ERESTARTSYS. In order to catch any callbacks that
@@ -718,11 +640,14 @@ __rpc_execute(struct rpc_task *task)
 				rpc_wake_up_task(task);
 			}
 		}
+		rpc_set_running(task);
 		dprintk("RPC: %4d sync task resuming\n", task->tk_pid);
 	}

 	if (task->tk_exit) {
+		lock_kernel();
 		task->tk_exit(task);
+		unlock_kernel();
 		/* If tk_action is non-null, the user wants us to restart */
 		if (task->tk_action) {
 			if (!RPC_ASSASSINATED(task)) {
@@ -741,7 +666,6 @@ __rpc_execute(struct rpc_task *task)

 	/* Release all resources associated with the task */
 	rpc_release_task(task);
-
 	return status;
 }

@@ -757,57 +681,16 @@ __rpc_execute(struct rpc_task *task)
 int
 rpc_execute(struct rpc_task *task)
 {
-	int status = -EIO;
-	if (rpc_inhibit) {
-		printk(KERN_INFO "RPC: execution inhibited!\n");
-		goto out_release;
-	}
-
-	status = -EWOULDBLOCK;
-	if (task->tk_active) {
-		printk(KERN_ERR "RPC: active task was run twice!\n");
-		goto out_err;
-	}
+	BUG_ON(task->tk_active);

 	task->tk_active = 1;
 	rpc_set_running(task);
 	return __rpc_execute(task);
- out_release:
-	rpc_release_task(task);
- out_err:
-	return status;
 }

-/*
- * This is our own little scheduler for async RPC tasks.
- */
-static void
-__rpc_schedule(void)
+static void rpc_async_schedule(void *arg)
 {
-	struct rpc_task	*task;
-	int		count = 0;
-
-	dprintk("RPC:      rpc_schedule enter\n");
-	while (1) {
-
-		task_for_first(task, &schedq.tasks[0]) {
-			__rpc_remove_wait_queue(task);
-			spin_unlock_bh(&rpc_queue_lock);
-
-			__rpc_execute(task);
-			spin_lock_bh(&rpc_queue_lock);
-		} else {
-			break;
-		}
-
-		if (++count >= 200 || need_resched()) {
-			count = 0;
-			spin_unlock_bh(&rpc_queue_lock);
-			schedule();
-			spin_lock_bh(&rpc_queue_lock);
-		}
-	}
-	dprintk("RPC:      rpc_schedule leave\n");
+	__rpc_execute((struct rpc_task *)arg);
 }

 /*
@@ -865,7 +748,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
 	task->tk_client = clnt;
 	task->tk_flags  = flags;
 	task->tk_exit   = callback;
-	init_waitqueue_head(&task->tk_wait);
 	if (current->uid != current->fsuid || current->gid != current->fsgid)
 		task->tk_flags |= RPC_TASK_SETUID;

@@ -876,12 +758,11 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call

 	task->tk_priority = RPC_PRIORITY_NORMAL;
 	task->tk_cookie = (unsigned long)current;
-	INIT_LIST_HEAD(&task->tk_links);

-	/* Add to global list of all tasks */
-	spin_lock(&rpc_sched_lock);
-	list_add(&task->tk_task, &all_tasks);
-	spin_unlock(&rpc_sched_lock);
+	/* Initialize workqueue for async tasks */
+	task->tk_workqueue = rpciod_workqueue;
+	if (!RPC_IS_ASYNC(task))
+		init_waitqueue_head(&task->u.tk_wait.waitq);

 	if (clnt) {
 		atomic_inc(&clnt->cl_users);
@@ -892,9 +773,14 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
 	}

 #ifdef RPC_DEBUG
-	task->tk_magic = 0xf00baa;
+	task->tk_magic = RPC_TASK_MAGIC_ID;
 	task->tk_pid = rpc_task_id++;
 #endif
+	/* Add to global list of all tasks */
+	spin_lock(&rpc_sched_lock);
+	list_add_tail(&task->tk_task, &all_tasks);
+	spin_unlock(&rpc_sched_lock);
+
 	dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
 				current->pid);
 }
@@ -947,18 +833,12 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
 	goto out;
 }

-void
-rpc_release_task(struct rpc_task *task)
+void rpc_release_task(struct rpc_task *task)
 {
 	dprintk("RPC: %4d release task\n", task->tk_pid);

 #ifdef RPC_DEBUG
-	if (task->tk_magic != 0xf00baa) {
-		printk(KERN_ERR "RPC: attempt to release a non-existing task!\n");
-		rpc_debug = ~0;
-		rpc_show_tasks();
-		return;
-	}
+	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
 #endif

 	/* Remove from global task list */
@@ -966,19 +846,9 @@ rpc_release_task(struct rpc_task *task)
 	list_del(&task->tk_task);
 	spin_unlock(&rpc_sched_lock);

-	/* Protect the execution below. */
-	spin_lock_bh(&rpc_queue_lock);
-
-	/* Disable timer to prevent zombie wakeup */
-	__rpc_disable_timer(task);
-
-	/* Remove from any wait queue we're still on */
-	__rpc_remove_wait_queue(task);
-
+	BUG_ON (RPC_IS_QUEUED(task));
 	task->tk_active = 0;

-	spin_unlock_bh(&rpc_queue_lock);
-
 	/* Synchronously delete any running timer */
 	rpc_delete_timer(task);

@@ -1008,10 +878,9 @@ rpc_release_task(struct rpc_task *task)
 * queue 'childq'. If so returns a pointer to the parent.
 * Upon failure returns NULL.
 *
- * Caller must hold rpc_queue_lock
+ * Caller must hold childq.lock
 */
-static inline struct rpc_task *
-rpc_find_parent(struct rpc_task *child)
+static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
 {
 	struct rpc_task	*task, *parent;
 	struct list_head *le;
@@ -1024,17 +893,16 @@ rpc_find_parent(struct rpc_task *child)
 	return NULL;
 }

-static void
-rpc_child_exit(struct rpc_task *child)
+static void rpc_child_exit(struct rpc_task *child)
 {
 	struct rpc_task	*parent;

-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&childq.lock);
 	if ((parent = rpc_find_parent(child)) != NULL) {
 		parent->tk_status = child->tk_status;
 		__rpc_wake_up_task(parent);
 	}
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&childq.lock);
 }

 /*
@@ -1057,22 +925,20 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
 	return NULL;
 }

-void
-rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
+void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func)
 {
-	spin_lock_bh(&rpc_queue_lock);
+	spin_lock_bh(&childq.lock);
 	/* N.B. Is it possible for the child to have already finished? */
 	__rpc_sleep_on(&childq, task, func, NULL);
 	rpc_schedule_run(child);
-	spin_unlock_bh(&rpc_queue_lock);
+	spin_unlock_bh(&childq.lock);
 }

 /*
 * Kill all tasks for the given client.
 * XXX: kill their descendants as well?
 */
-void
-rpc_killall_tasks(struct rpc_clnt *clnt)
+void rpc_killall_tasks(struct rpc_clnt *clnt)
 {
 	struct rpc_task	*rovr;
 	struct list_head *le;
@@ -1083,104 +949,28 @@ rpc_killall_tasks(struct rpc_clnt *clnt)
 	 * Spin lock all_tasks to prevent changes...
 	 */
 	spin_lock(&rpc_sched_lock);
-	alltask_for_each(rovr, le, &all_tasks)
+	alltask_for_each(rovr, le, &all_tasks) {
+		if (! RPC_IS_ACTIVATED(rovr))
+			continue;
 		if (!clnt || rovr->tk_client == clnt) {
 			rovr->tk_flags |= RPC_TASK_KILLED;
 			rpc_exit(rovr, -EIO);
 			rpc_wake_up_task(rovr);
 		}
+	}
 	spin_unlock(&rpc_sched_lock);
 }

 static DECLARE_MUTEX_LOCKED(rpciod_running);

-static inline int
-rpciod_task_pending(void)
-{
-	return !list_empty(&schedq.tasks[0]);
-}
-
-
-/*
- * This is the rpciod kernel thread
- */
-static int
-rpciod(void *ptr)
-{
-	int		rounds = 0;
-
-	lock_kernel();
-	/*
-	 * Let our maker know we're running ...
-	 */
-	rpciod_pid = current->pid;
-	up(&rpciod_running);
-
-	daemonize("rpciod");
-	allow_signal(SIGKILL);
-
-	dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
-	spin_lock_bh(&rpc_queue_lock);
-	while (rpciod_users) {
-		DEFINE_WAIT(wait);
-		if (signalled()) {
-			spin_unlock_bh(&rpc_queue_lock);
-			rpciod_killall();
-			flush_signals(current);
-			spin_lock_bh(&rpc_queue_lock);
-		}
-		__rpc_schedule();
-		if (current->flags & PF_FREEZE) {
-			spin_unlock_bh(&rpc_queue_lock);
-			refrigerator(PF_FREEZE);
-			spin_lock_bh(&rpc_queue_lock);
-		}
-
-		if (++rounds >= 64) {	/* safeguard */
-			spin_unlock_bh(&rpc_queue_lock);
-			schedule();
-			rounds = 0;
-			spin_lock_bh(&rpc_queue_lock);
-		}
-
-		dprintk("RPC: rpciod back to sleep\n");
-		prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE);
-		if (!rpciod_task_pending() && !signalled()) {
-			spin_unlock_bh(&rpc_queue_lock);
-			schedule();
-			rounds = 0;
-			spin_lock_bh(&rpc_queue_lock);
-		}
-		finish_wait(&rpciod_idle, &wait);
-		dprintk("RPC: switch to rpciod\n");
-	}
-	spin_unlock_bh(&rpc_queue_lock);
-
-	dprintk("RPC: rpciod shutdown commences\n");
-	if (!list_empty(&all_tasks)) {
-		printk(KERN_ERR "rpciod: active tasks at shutdown?!\n");
-		rpciod_killall();
-	}
-
-	dprintk("RPC: rpciod exiting\n");
-	unlock_kernel();
-
-	rpciod_pid = 0;
-	complete_and_exit(&rpciod_killer, 0);
-	return 0;
-}
-
-static void
-rpciod_killall(void)
+static void rpciod_killall(void)
 {
 	unsigned long flags;

 	while (!list_empty(&all_tasks)) {
 		clear_thread_flag(TIF_SIGPENDING);
 		rpc_killall_tasks(NULL);
-		spin_lock_bh(&rpc_queue_lock);
-		__rpc_schedule();
-		spin_unlock_bh(&rpc_queue_lock);
+		flush_workqueue(rpciod_workqueue);
 		if (!list_empty(&all_tasks)) {
 			dprintk("rpciod_killall: waiting for tasks to exit\n");
 			yield();
@@ -1198,28 +988,30 @@ rpciod_killall(void)
 int
 rpciod_up(void)
 {
+	struct workqueue_struct *wq;
 	int error = 0;

 	down(&rpciod_sema);
-	dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
+	dprintk("rpciod_up: users %d\n", rpciod_users);
 	rpciod_users++;
-	if (rpciod_pid)
+	if (rpciod_workqueue)
 		goto out;
 	/*
 	 * If there's no pid, we should be the first user.
 	 */
 	if (rpciod_users > 1)
-		printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users);
+		printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users);
 	/*
 	 * Create the rpciod thread and wait for it to start.
 	 */
-	error = kernel_thread(rpciod, NULL, 0);
-	if (error < 0) {
-		printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error);
+	error = -ENOMEM;
+	wq = create_workqueue("rpciod");
+	if (wq == NULL) {
+		printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
 		rpciod_users--;
 		goto out;
 	}
-	down(&rpciod_running);
+	rpciod_workqueue = wq;
 	error = 0;
 out:
 	up(&rpciod_sema);
@@ -1230,20 +1022,21 @@ void
 rpciod_down(void)
 {
 	down(&rpciod_sema);
-	dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
+	dprintk("rpciod_down sema %d\n", rpciod_users);
 	if (rpciod_users) {
 		if (--rpciod_users)
 			goto out;
 	} else
-		printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid);
+		printk(KERN_WARNING "rpciod_down: no users??\n");

-	if (!rpciod_pid) {
+	if (!rpciod_workqueue) {
 		dprintk("rpciod_down: Nothing to do!\n");
 		goto out;
 	}
+	rpciod_killall();

-	kill_proc(rpciod_pid, SIGKILL, 1);
-	wait_for_completion(&rpciod_killer);
+	destroy_workqueue(rpciod_workqueue);
+	rpciod_workqueue = NULL;
 out:
 	up(&rpciod_sema);
 }
@@ -1261,7 +1054,12 @@ void rpc_show_tasks(void)
 	}
 	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
 		"-rpcwait -action- --exit--\n");
-	alltask_for_each(t, le, &all_tasks)
+	alltask_for_each(t, le, &all_tasks) {
+		const char *rpc_waitq = "none";
+
+		if (RPC_IS_QUEUED(t))
+			rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
+
 		printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n",
 			t->tk_pid,
 			(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
@@ -1269,8 +1067,9 @@ void rpc_show_tasks(void)
 			t->tk_client,
 			(t->tk_client ? t->tk_client->cl_prog : 0),
 			t->tk_rqstp, t->tk_timeout,
-			rpc_qname(t->tk_rpcwait),
+			rpc_waitq,
 			t->tk_action, t->tk_exit);
+	}
 	spin_unlock(&rpc_sched_lock);
 }
 #endif

--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -371,6 +371,7 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
 	do {
 		/* Are any pointers crossing a page boundary? */
 		if (pgto_base == 0) {
+			flush_dcache_page(*pgto);
 			pgto_base = PAGE_CACHE_SIZE;
 			pgto--;
 		}
@@ -394,6 +395,7 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
 		kunmap_atomic(vto, KM_USER0);

 	} while ((len -= copy) != 0);
+	flush_dcache_page(*pgto);
 }

 /*
@@ -427,12 +429,14 @@ _copy_to_pages(struct page **pages, size_t pgbase, const char *p, size_t len)

 		pgbase += copy;
 		if (pgbase == PAGE_CACHE_SIZE) {
+			flush_dcache_page(*pgto);
 			pgbase = 0;
 			pgto++;
 		}
 		p += copy;

 	} while ((len -= copy) != 0);
+	flush_dcache_page(*pgto);
 }

 /*

--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -893,7 +893,8 @@ tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
 	xprt->tcp_flags &= ~XPRT_COPY_XID;
 	xprt->tcp_flags |= XPRT_COPY_DATA;
 	xprt->tcp_copied = 4;
-	dprintk("RPC:      reading reply for XID %08x\n", xprt->tcp_xid);
+	dprintk("RPC:      reading reply for XID %08x\n",
+						ntohl(xprt->tcp_xid));
 	tcp_check_recm(xprt);
 }

@@ -913,7 +914,7 @@ tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
 	if (!req) {
 		xprt->tcp_flags &= ~XPRT_COPY_DATA;
 		dprintk("RPC:      XID %08x request not found!\n",
-				xprt->tcp_xid);
+				ntohl(xprt->tcp_xid));
 		spin_unlock(&xprt->sock_lock);
 		return;
 	}
@@ -1103,7 +1104,7 @@ xprt_write_space(struct sock *sk)
 		goto out;

 	spin_lock_bh(&xprt->sock_lock);
-	if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending)
+	if (xprt->snd_task)
 		rpc_wake_up_task(xprt->snd_task);
 	spin_unlock_bh(&xprt->sock_lock);
 out:
@@ -1362,7 +1363,7 @@ xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 	req->rq_xprt    = xprt;
 	req->rq_xid     = xprt_alloc_xid(xprt);
 	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
-			req, req->rq_xid);
+			req, ntohl(req->rq_xid));
 }

 /*