Commit ca738a2b authored by Trond Myklebust's avatar Trond Myklebust

[PATCH] Make nfs_find_request() scale

nfs_find_request() needs to be called every time we schedule a write
on the page cache. Currently it is implemented as a linked list which
needs to be traversed completely in the case where we don't already
have a pending write request on the page in question.

The following patch adopts the new radix tree, as is already used in
the page cache. Performance change is more or less negligeable with
the current hard limit of 256 outstanding write requests per mount.

However when I remove this limit then the old nfs_find_request()
actually results in a 50% reduction in speed on my benchmark test
(iozone with 4 threads each writing a 512Mb file on a 512Mb Linux
client against a Solaris server on 100Mbit switched net). With this
patch, the result for the same benchmark is a 50% increase in speed.
parent df43f015
......@@ -1559,7 +1559,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
inode_init_once(&nfsi->vfs_inode);
INIT_LIST_HEAD(&nfsi->dirty);
INIT_LIST_HEAD(&nfsi->commit);
INIT_LIST_HEAD(&nfsi->writeback);
INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
nfsi->ndirty = 0;
nfsi->ncommit = 0;
nfsi->npages = 0;
......
......@@ -36,7 +36,6 @@ nfs_page_alloc(void)
p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS);
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->wb_hash);
INIT_LIST_HEAD(&p->wb_list);
INIT_LIST_HEAD(&p->wb_lru);
init_waitqueue_head(&p->wb_wait);
......@@ -161,14 +160,9 @@ nfs_release_request(struct nfs_page *req)
spin_unlock(&nfs_wreq_lock);
#ifdef NFS_PARANOIA
if (!list_empty(&req->wb_list))
BUG();
if (!list_empty(&req->wb_hash))
BUG();
if (NFS_WBACK_BUSY(req))
BUG();
if (atomic_read(&NFS_REQUESTLIST(req->wb_inode)->nr_requests) < 0)
BUG();
BUG_ON (!list_empty(&req->wb_list));
BUG_ON (NFS_WBACK_BUSY(req));
BUG_ON (atomic_read(&NFS_REQUESTLIST(req->wb_inode)->nr_requests) < 0);
#endif
/* Release struct file or cached credential */
......
......@@ -301,19 +301,21 @@ nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
/*
* Insert a write request into an inode
*/
static inline void
static inline int
nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(inode);
if (!list_empty(&req->wb_hash))
return;
if (!NFS_WBACK_BUSY(req))
printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
if (list_empty(&nfsi->writeback))
int error;
error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
BUG_ON(error == -EEXIST);
if (error)
return error;
if (!nfsi->npages)
igrab(inode);
nfsi->npages++;
list_add(&req->wb_hash, &nfsi->writeback);
req->wb_count++;
return 0;
}
/*
......@@ -324,21 +326,14 @@ nfs_inode_remove_request(struct nfs_page *req)
{
struct nfs_inode *nfsi;
struct inode *inode;
BUG_ON (!NFS_WBACK_BUSY(req));
spin_lock(&nfs_wreq_lock);
if (list_empty(&req->wb_hash)) {
spin_unlock(&nfs_wreq_lock);
return;
}
if (!NFS_WBACK_BUSY(req))
printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
inode = req->wb_inode;
list_del(&req->wb_hash);
INIT_LIST_HEAD(&req->wb_hash);
nfsi = NFS_I(inode);
radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
nfsi->npages--;
if ((nfsi->npages == 0) != list_empty(&nfsi->writeback))
printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
if (list_empty(&nfsi->writeback)) {
if (!nfsi->npages) {
spin_unlock(&nfs_wreq_lock);
iput(inode);
} else
......@@ -354,19 +349,12 @@ static inline struct nfs_page *
_nfs_find_request(struct inode *inode, unsigned long index)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct list_head *head, *next;
head = &nfsi->writeback;
next = head->next;
while (next != head) {
struct nfs_page *req = nfs_inode_wb_entry(next);
next = next->next;
if (req->wb_index != index)
continue;
struct nfs_page *req;
req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index);
if (req)
req->wb_count++;
return req;
}
return NULL;
return req;
}
static struct nfs_page *
......@@ -437,8 +425,8 @@ static int
nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct list_head *p, *head;
unsigned long idx_end;
struct nfs_page *req;
unsigned long idx_end, next;
unsigned int res = 0;
int error;
......@@ -448,21 +436,17 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_s
idx_end = idx_start + npages - 1;
spin_lock(&nfs_wreq_lock);
head = &nfsi->writeback;
p = head->next;
while (p != head) {
struct nfs_page *req = nfs_inode_wb_entry(p);
p = p->next;
next = idx_start;
while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) {
if (req->wb_index > idx_end)
break;
next = req->wb_index + 1;
if (file && req->wb_file != file)
continue;
if (req->wb_index < idx_start || req->wb_index > idx_end)
continue;
if (!NFS_WBACK_BUSY(req))
continue;
req->wb_count++;
spin_unlock(&nfs_wreq_lock);
error = nfs_wait_on_request(req);
......@@ -470,7 +454,7 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_s
if (error < 0)
return error;
spin_lock(&nfs_wreq_lock);
p = head->next;
next = idx_start;
res++;
}
spin_unlock(&nfs_wreq_lock);
......@@ -664,8 +648,14 @@ nfs_update_request(struct file* file, struct inode *inode, struct page *page,
}
if (new) {
int error;
nfs_lock_request_dontget(new);
nfs_inode_add_request(inode, new);
error = nfs_inode_add_request(inode, new);
if (error) {
spin_unlock(&nfs_wreq_lock);
nfs_unlock_request(new);
return ERR_PTR(error);
}
spin_unlock(&nfs_wreq_lock);
nfs_mark_request_dirty(new);
return new;
......
......@@ -165,7 +165,7 @@ struct nfs_inode {
*/
struct list_head dirty;
struct list_head commit;
struct list_head writeback;
struct radix_tree_root nfs_page_tree;
unsigned int ndirty,
ncommit,
......@@ -356,7 +356,7 @@ nfs_commit_file(struct inode *inode, struct file *file, unsigned long offset,
static inline int
nfs_have_writebacks(struct inode *inode)
{
return !list_empty(&NFS_I(inode)->writeback);
return NFS_I(inode)->npages != 0;
}
static inline int
......
......@@ -23,8 +23,7 @@
#define PG_BUSY 0
struct nfs_page {
struct list_head wb_hash, /* Inode */
wb_lru, /* superblock lru list */
struct list_head wb_lru, /* superblock lru list */
wb_list, /* Defines state of page: */
*wb_list_head; /* read/write/commit */
struct file *wb_file;
......@@ -125,12 +124,6 @@ nfs_list_entry(struct list_head *head)
return list_entry(head, struct nfs_page, wb_list);
}
static inline struct nfs_page *
nfs_inode_wb_entry(struct list_head *head)
{
return list_entry(head, struct nfs_page, wb_hash);
}
static inline void
__nfs_add_lru(struct list_head *head, struct nfs_page *req)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment