Commit e7f90084 authored by Linus Torvalds's avatar Linus Torvalds

Merge

parents e1b24127 c03e7607
...@@ -222,7 +222,7 @@ rw_raw_dev(int rw, struct file *filp, const struct iovec *iov, unsigned long nr_ ...@@ -222,7 +222,7 @@ rw_raw_dev(int rw, struct file *filp, const struct iovec *iov, unsigned long nr_
count = inode->i_size - *offp; count = inode->i_size - *offp;
nr_segs = iov_shorten((struct iovec *)iov, nr_segs, count); nr_segs = iov_shorten((struct iovec *)iov, nr_segs, count);
} }
ret = generic_file_direct_IO(rw, inode, iov, *offp, nr_segs); ret = generic_file_direct_IO(rw, filp, iov, *offp, nr_segs);
if (ret > 0) if (ret > 0)
*offp += ret; *offp += ret;
......
...@@ -514,6 +514,27 @@ CONFIG_NFS_V3 ...@@ -514,6 +514,27 @@ CONFIG_NFS_V3
If unsure, say N. If unsure, say N.
CONFIG_NFS_DIRECTIO
This option enables applications to perform uncached I/O on files
in NFS file systems using the O_DIRECT open() flag. When O_DIRECT
is set for a file, its data is not cached in the system's page
cache. Data is moved to and from user-level application buffers
directly. Unlike local disk-based file systems, NFS O_DIRECT has
no alignment restrictions.
Unless your program is designed to use O_DIRECT properly, you are
much better off allowing the NFS client to manage data caching for
you. Misusing O_DIRECT can cause poor server performance or network
storms. This kernel build option defaults OFF to avoid exposing
system administrators unwittingly to a potentially hazardous
feature.
For more details on NFS O_DIRECT, see fs/nfs/direct.c.
If unsure, say N. This reduces the size of the NFS client, and
causes open() to return EINVAL if a file residing in NFS is
opened with the O_DIRECT flag.
CONFIG_ROOT_NFS CONFIG_ROOT_NFS
If you want your Linux box to mount its whole root file system (the If you want your Linux box to mount its whole root file system (the
one containing the directory /) from some other computer over the one containing the directory /) from some other computer over the
......
...@@ -116,9 +116,11 @@ blkdev_get_blocks(struct inode *inode, sector_t iblock, ...@@ -116,9 +116,11 @@ blkdev_get_blocks(struct inode *inode, sector_t iblock,
} }
static int static int
blkdev_direct_IO(int rw, struct inode *inode, const struct iovec *iov, blkdev_direct_IO(int rw, struct file *file, const struct iovec *iov,
loff_t offset, unsigned long nr_segs) loff_t offset, unsigned long nr_segs)
{ {
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return generic_direct_IO(rw, inode, iov, offset, return generic_direct_IO(rw, inode, iov, offset,
nr_segs, blkdev_get_blocks); nr_segs, blkdev_get_blocks);
} }
......
...@@ -647,10 +647,10 @@ generic_direct_IO(int rw, struct inode *inode, const struct iovec *iov, ...@@ -647,10 +647,10 @@ generic_direct_IO(int rw, struct inode *inode, const struct iovec *iov,
} }
ssize_t ssize_t
generic_file_direct_IO(int rw, struct inode *inode, const struct iovec *iov, generic_file_direct_IO(int rw, struct file *file, const struct iovec *iov,
loff_t offset, unsigned long nr_segs) loff_t offset, unsigned long nr_segs)
{ {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
ssize_t retval; ssize_t retval;
if (mapping->nrpages) { if (mapping->nrpages) {
...@@ -661,9 +661,9 @@ generic_file_direct_IO(int rw, struct inode *inode, const struct iovec *iov, ...@@ -661,9 +661,9 @@ generic_file_direct_IO(int rw, struct inode *inode, const struct iovec *iov,
goto out; goto out;
} }
retval = mapping->a_ops->direct_IO(rw, inode, iov, offset, nr_segs); retval = mapping->a_ops->direct_IO(rw, file, iov, offset, nr_segs);
if (inode->i_mapping->nrpages) if (mapping->nrpages)
invalidate_inode_pages2(inode->i_mapping); invalidate_inode_pages2(mapping);
out: out:
return retval; return retval;
} }
...@@ -619,9 +619,11 @@ ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, ...@@ -619,9 +619,11 @@ ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
} }
static int static int
ext2_direct_IO(int rw, struct inode *inode, const struct iovec *iov, ext2_direct_IO(int rw, struct file *file, const struct iovec *iov,
loff_t offset, unsigned long nr_segs) loff_t offset, unsigned long nr_segs)
{ {
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return generic_direct_IO(rw, inode, iov, return generic_direct_IO(rw, inode, iov,
offset, nr_segs, ext2_get_blocks); offset, nr_segs, ext2_get_blocks);
} }
......
...@@ -1399,10 +1399,11 @@ static int ext3_releasepage(struct page *page, int wait) ...@@ -1399,10 +1399,11 @@ static int ext3_releasepage(struct page *page, int wait)
* If the O_DIRECT write is intantiating holes inside i_size and the machine * If the O_DIRECT write is intantiating holes inside i_size and the machine
* crashes then stale disk data _may_ be exposed inside the file. * crashes then stale disk data _may_ be exposed inside the file.
*/ */
static int ext3_direct_IO(int rw, struct inode *inode, static int ext3_direct_IO(int rw, struct file *file,
const struct iovec *iov, loff_t offset, const struct iovec *iov, loff_t offset,
unsigned long nr_segs) unsigned long nr_segs)
{ {
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
struct ext3_inode_info *ei = EXT3_I(inode); struct ext3_inode_info *ei = EXT3_I(inode);
handle_t *handle = NULL; handle_t *handle = NULL;
int ret; int ret;
......
...@@ -310,9 +310,11 @@ static int jfs_bmap(struct address_space *mapping, long block) ...@@ -310,9 +310,11 @@ static int jfs_bmap(struct address_space *mapping, long block)
return generic_block_bmap(mapping, block, jfs_get_block); return generic_block_bmap(mapping, block, jfs_get_block);
} }
static int jfs_direct_IO(int rw, struct inode *inode, const struct iovec *iov, static int jfs_direct_IO(int rw, struct file *file, const struct iovec *iov,
loff_t offset, unsigned long nr_segs) loff_t offset, unsigned long nr_segs)
{ {
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return generic_direct_IO(rw, inode, iov, return generic_direct_IO(rw, inode, iov,
offset, nr_segs, jfs_get_blocks); offset, nr_segs, jfs_get_blocks);
} }
......
...@@ -8,6 +8,7 @@ nfs-y := dir.o file.o flushd.o inode.o nfs2xdr.o pagelist.o \ ...@@ -8,6 +8,7 @@ nfs-y := dir.o file.o flushd.o inode.o nfs2xdr.o pagelist.o \
proc.o read.o symlink.o unlink.o write.o proc.o read.o symlink.o unlink.o write.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
nfs-objs := $(nfs-y) nfs-objs := $(nfs-y)
include $(TOPDIR)/Rules.make include $(TOPDIR)/Rules.make
/*
* linux/fs/nfs/direct.c
*
* Copyright (C) 2001 by Chuck Lever <cel@netapp.com>
*
* High-performance uncached I/O for the Linux NFS client
*
* There are important applications whose performance or correctness
* depends on uncached access to file data. Database clusters
* (multiple copies of the same instance running on separate hosts)
* implement their own cache coherency protocol that subsumes file
* system cache protocols. Applications that process datasets
* considerably larger than the client's memory do not always benefit
* from a local cache. A streaming video server, for instance, has no
* need to cache the contents of a file.
*
* When an application requests uncached I/O, all read and write requests
* are made directly to the server; data stored or fetched via these
* requests is not cached in the Linux page cache. The client does not
* correct unaligned requests from applications. All requested bytes are
* held on permanent storage before a direct write system call returns to
* an application.
*
* Solaris implements an uncached I/O facility called directio() that
* is used for backups and sequential I/O to very large files. Solaris
* also supports uncaching whole NFS partitions with "-o forcedirectio,"
* an undocumented mount option.
*
* Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust.
*
* 18 Dec 2001 Initial implementation for 2.4 --cel
* 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
* 24 Sep 2002 Rewrite to use asynchronous RPCs, port to 2.5 --cel
*
*/
#include <linux/config.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/file.h>
#include <linux/errno.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/sunrpc/clnt.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#define NFSDBG_FACILITY (NFSDBG_PAGECACHE | NFSDBG_VFS)
#define VERF_SIZE (2 * sizeof(__u32))
/**
* nfs_get_user_pages - find and set up page representing user buffer
* addr: user-space address of target buffer
* size: total size in bytes of target buffer
* @pages: returned array of page struct pointers underlying target buffer
* write: whether or not buffer is target of a write operation
*/
static inline int
nfs_get_user_pages(unsigned long addr, size_t size,
struct page ***pages, int rw)
{
int result = -ENOMEM;
unsigned page_count = (unsigned) size >> PAGE_SHIFT;
unsigned array_size = (page_count * sizeof(struct page *)) + 2U;
*pages = (struct page **) kmalloc(array_size, GFP_KERNEL);
if (*pages) {
down_read(&current->mm->mmap_sem);
result = get_user_pages(current, current->mm, addr,
page_count, (rw == WRITE), 0,
*pages, NULL);
up_read(&current->mm->mmap_sem);
if (result < 0)
printk(KERN_ERR "%s: get_user_pages result %d\n",
__FUNCTION__, result);
}
return result;
}
/**
* nfs_free_user_pages - tear down page struct array
* @pages: array of page struct pointers underlying target buffer
*/
static inline void
nfs_free_user_pages(struct page **pages, unsigned count)
{
unsigned page = 0;
while (count--)
page_cache_release(pages[page++]);
kfree(pages);
}
/**
* nfs_iov2pagelist - convert an array of iovecs to a list of page requests
* @inode: inode of target file
* @cred: credentials of user who requested I/O
* @iov: array of vectors that define I/O buffer
* offset: where in file to begin the read
* nr_segs: size of iovec array
* @requests: append new page requests to this list head
*/
static int
nfs_iov2pagelist(int rw, const struct inode *inode,
const struct rpc_cred *cred,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs, struct list_head *requests)
{
unsigned seg;
int tot_bytes = 0;
struct page **pages;
/* for each iovec in the array... */
for (seg = 0; seg < nr_segs; seg++) {
const unsigned long user_addr =
(unsigned long) iov[seg].iov_base;
size_t bytes = iov[seg].iov_len;
unsigned int pg_offset = (user_addr & ~PAGE_MASK);
int page_count, page = 0;
page_count = nfs_get_user_pages(user_addr, bytes, &pages, rw);
if (page_count < 0) {
nfs_release_list(requests);
return page_count;
}
/* ...build as many page requests as required */
while (bytes > 0) {
struct nfs_page *new;
const unsigned int pg_bytes = (bytes > PAGE_SIZE) ?
PAGE_SIZE : bytes;
new = nfs_create_request((struct rpc_cred *) cred,
(struct inode *) inode,
pages[page],
pg_offset, pg_bytes);
if (IS_ERR(new)) {
nfs_free_user_pages(pages, page_count);
nfs_release_list(requests);
return PTR_ERR(new);
}
new->wb_index = offset;
nfs_list_add_request(new, requests);
/* after the first page */
pg_offset = 0;
offset += PAGE_SIZE;
tot_bytes += pg_bytes;
bytes -= pg_bytes;
page++;
}
/* don't release pages here -- I/O completion will do that */
nfs_free_user_pages(pages, 0);
}
return tot_bytes;
}
/**
* do_nfs_direct_IO - Read or write data without caching
* @inode: inode of target file
* @cred: credentials of user who requested I/O
* @iov: array of vectors that define I/O buffer
* offset: where in file to begin the read
* nr_segs: size of iovec array
*
* Break the passed-in iovec into a series of page-sized or smaller
* requests, where each page is mapped for direct user-land I/O.
*
* For each of these pages, create an NFS page request and
* append it to an automatic list of page requests.
*
* When all page requests have been queued, start the I/O on the
* whole list. The underlying routines coalesce the pages on the
* list into a bunch of asynchronous "r/wsize" network requests.
*
* I/O completion automatically unmaps and releases the pages.
*/
static int
do_nfs_direct_IO(int rw, const struct inode *inode,
const struct rpc_cred *cred, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
{
LIST_HEAD(requests);
int result, tot_bytes;
result = nfs_iov2pagelist(rw, inode, cred, iov, offset, nr_segs,
&requests);
if (result < 0)
return result;
tot_bytes = result;
switch (rw) {
case READ:
if (IS_SYNC(inode) || (NFS_SERVER(inode)->rsize < PAGE_SIZE)) {
result = nfs_direct_read_sync(inode, cred, iov, offset, nr_segs);
break;
}
result = nfs_pagein_list(&requests, NFS_SERVER(inode)->rpages);
nfs_wait_for_reads(&requests);
break;
case WRITE:
if (IS_SYNC(inode) || (NFS_SERVER(inode)->wsize < PAGE_SIZE))
result = nfs_direct_write_sync(inode, cred, iov, offset, nr_segs);
else
result = nfs_flush_list(&requests,
NFS_SERVER(inode)->wpages, FLUSH_WAIT);
/* invalidate cache so non-direct readers pick up changes */
invalidate_inode_pages((struct inode *) inode);
break;
default:
result = -EINVAL;
break;
}
if (result < 0)
return result;
return tot_bytes;
}
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* rw: direction (read or write)
* @file: file struct of target file
* @iov: array of vectors that define I/O buffer
* offset: offset in file to begin the operation
* nr_segs: size of iovec array
*
* The inode's i_sem is no longer held by the VFS layer before it calls
* this function to do a write.
*/
int
nfs_direct_IO(int rw, struct file *file, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
{
/* None of this works yet, so prevent it from compiling. */
#if 0
int result;
struct dentry *dentry = file->f_dentry;
const struct inode *inode = dentry->d_inode->i_mapping->host;
const struct rpc_cred *cred = nfs_file_cred(file);
#endif
dfprintk(VFS, "NFS: direct_IO(%s) (%s/%s) off/no(%Lu/%lu)\n",
((rw == READ) ? "READ" : "WRITE"),
dentry->d_parent->d_name.name,
dentry->d_name.name, offset, nr_segs);
result = do_nfs_direct_IO(rw, inode, cred, iov, offset, nr_segs);
dfprintk(VFS, "NFS: direct_IO result = %d\n", result);
return result;
}
...@@ -176,7 +176,7 @@ static int nfs_sync_page(struct page *page) ...@@ -176,7 +176,7 @@ static int nfs_sync_page(struct page *page)
{ {
struct address_space *mapping; struct address_space *mapping;
struct inode *inode; struct inode *inode;
unsigned long index = page_index(page); unsigned long index = page->index;
unsigned int rpages; unsigned int rpages;
int result; int result;
...@@ -199,7 +199,10 @@ struct address_space_operations nfs_file_aops = { ...@@ -199,7 +199,10 @@ struct address_space_operations nfs_file_aops = {
.sync_page = nfs_sync_page, .sync_page = nfs_sync_page,
.writepage = nfs_writepage, .writepage = nfs_writepage,
.prepare_write = nfs_prepare_write, .prepare_write = nfs_prepare_write,
.commit_write = nfs_commit_write .commit_write = nfs_commit_write,
#ifdef CONFIG_NFS_DIRECTIO
.direct_IO = nfs_direct_IO,
#endif
}; };
/* /*
......
...@@ -103,6 +103,7 @@ nfs_create_request(struct rpc_cred *cred, struct inode *inode, ...@@ -103,6 +103,7 @@ nfs_create_request(struct rpc_cred *cred, struct inode *inode,
* long write-back delay. This will be adjusted in * long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */ * update_nfs_request below if the region is not locked. */
req->wb_page = page; req->wb_page = page;
req->wb_index = page->index;
page_cache_get(page); page_cache_get(page);
req->wb_offset = offset; req->wb_offset = offset;
req->wb_bytes = count; req->wb_bytes = count;
...@@ -174,6 +175,26 @@ nfs_release_request(struct nfs_page *req) ...@@ -174,6 +175,26 @@ nfs_release_request(struct nfs_page *req)
nfs_page_free(req); nfs_page_free(req);
} }
/**
* nfs_release_list - cleanly dispose of an unattached list of page requests
* @list: list of doomed page requests
*/
void
nfs_release_list(struct list_head *list)
{
while (!list_empty(list)) {
struct nfs_page *req = nfs_list_entry(list);
nfs_list_remove_request(req);
page_cache_release(req->wb_page);
/* Release struct file or cached credential */
nfs_clear_request(req);
nfs_page_free(req);
}
}
/** /**
* nfs_list_add_request - Insert a request into a sorted list * nfs_list_add_request - Insert a request into a sorted list
* @req: request * @req: request
...@@ -188,7 +209,6 @@ void ...@@ -188,7 +209,6 @@ void
nfs_list_add_request(struct nfs_page *req, struct list_head *head) nfs_list_add_request(struct nfs_page *req, struct list_head *head)
{ {
struct list_head *pos; struct list_head *pos;
unsigned long pg_idx = page_index(req->wb_page);
#ifdef NFS_PARANOIA #ifdef NFS_PARANOIA
if (!list_empty(&req->wb_list)) { if (!list_empty(&req->wb_list)) {
...@@ -198,7 +218,7 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head) ...@@ -198,7 +218,7 @@ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
#endif #endif
list_for_each_prev(pos, head) { list_for_each_prev(pos, head) {
struct nfs_page *p = nfs_list_entry(pos); struct nfs_page *p = nfs_list_entry(pos);
if (page_index(p->wb_page) < pg_idx) if (p->wb_index < req->wb_index)
break; break;
} }
list_add(&req->wb_list, pos); list_add(&req->wb_list, pos);
...@@ -223,6 +243,37 @@ nfs_wait_on_request(struct nfs_page *req) ...@@ -223,6 +243,37 @@ nfs_wait_on_request(struct nfs_page *req)
return nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req)); return nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req));
} }
/**
* nfs_wait_for_reads - wait for outstanding requests to complete
* @head: list of page requests to wait for
*/
int
nfs_wait_for_reads(struct list_head *head)
{
struct list_head *p = head->next;
unsigned int res = 0;
while (p != head) {
struct nfs_page *req = nfs_list_entry(p);
int error;
if (!NFS_WBACK_BUSY(req))
continue;
req->wb_count++;
error = nfs_wait_on_request(req);
if (error < 0)
return error;
nfs_list_remove_request(req);
nfs_clear_request(req);
nfs_page_free(req);
p = head->next;
res++;
}
return res;
}
/** /**
* nfs_coalesce_requests - Split coalesced requests out from a list. * nfs_coalesce_requests - Split coalesced requests out from a list.
* @head: source list * @head: source list
...@@ -247,7 +298,7 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst, ...@@ -247,7 +298,7 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
if (prev) { if (prev) {
if (req->wb_cred != prev->wb_cred) if (req->wb_cred != prev->wb_cred)
break; break;
if (page_index(req->wb_page) != page_index(prev->wb_page)+1) if (req->wb_index != (prev->wb_index + 1))
break; break;
if (req->wb_offset != 0) if (req->wb_offset != 0)
...@@ -280,7 +331,7 @@ nfs_scan_forward(struct nfs_page *req, struct list_head *dst, int nmax) ...@@ -280,7 +331,7 @@ nfs_scan_forward(struct nfs_page *req, struct list_head *dst, int nmax)
struct nfs_server *server = NFS_SERVER(req->wb_inode); struct nfs_server *server = NFS_SERVER(req->wb_inode);
struct list_head *pos, *head = req->wb_list_head; struct list_head *pos, *head = req->wb_list_head;
struct rpc_cred *cred = req->wb_cred; struct rpc_cred *cred = req->wb_cred;
unsigned long idx = page_index(req->wb_page) + 1; unsigned long idx = req->wb_index + 1;
int npages = 0; int npages = 0;
for (pos = req->wb_list.next; nfs_lock_request(req); pos = pos->next) { for (pos = req->wb_list.next; nfs_lock_request(req); pos = pos->next) {
...@@ -296,7 +347,7 @@ nfs_scan_forward(struct nfs_page *req, struct list_head *dst, int nmax) ...@@ -296,7 +347,7 @@ nfs_scan_forward(struct nfs_page *req, struct list_head *dst, int nmax)
if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE) if (req->wb_offset + req->wb_bytes != PAGE_CACHE_SIZE)
break; break;
req = nfs_list_entry(pos); req = nfs_list_entry(pos);
if (page_index(req->wb_page) != idx++) if (req->wb_index != idx++)
break; break;
if (req->wb_offset != 0) if (req->wb_offset != 0)
break; break;
...@@ -393,17 +444,15 @@ nfs_scan_list(struct list_head *head, struct list_head *dst, ...@@ -393,17 +444,15 @@ nfs_scan_list(struct list_head *head, struct list_head *dst,
idx_end = idx_start + npages - 1; idx_end = idx_start + npages - 1;
list_for_each_safe(pos, tmp, head) { list_for_each_safe(pos, tmp, head) {
unsigned long pg_idx;
req = nfs_list_entry(pos); req = nfs_list_entry(pos);
if (file && req->wb_file != file) if (file && req->wb_file != file)
continue; continue;
pg_idx = page_index(req->wb_page); if (req->wb_index < idx_start)
if (pg_idx < idx_start)
continue; continue;
if (pg_idx > idx_end) if (req->wb_index > idx_end)
break; break;
if (!nfs_lock_request(req)) if (!nfs_lock_request(req))
......
...@@ -179,7 +179,7 @@ nfs_readpage_async(struct file *file, struct inode *inode, struct page *page) ...@@ -179,7 +179,7 @@ nfs_readpage_async(struct file *file, struct inode *inode, struct page *page)
nfs_mark_request_read(new); nfs_mark_request_read(new);
if (nfsi->nread >= NFS_SERVER(inode)->rpages || if (nfsi->nread >= NFS_SERVER(inode)->rpages ||
page_index(page) == (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) page->index == (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
nfs_pagein_inode(inode, 0, 0); nfs_pagein_inode(inode, 0, 0);
return 0; return 0;
} }
...@@ -207,7 +207,7 @@ nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data) ...@@ -207,7 +207,7 @@ nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data)
data->inode = req->wb_inode; data->inode = req->wb_inode;
data->cred = req->wb_cred; data->cred = req->wb_cred;
data->args.fh = NFS_FH(req->wb_inode); data->args.fh = NFS_FH(req->wb_inode);
data->args.offset = page_offset(req->wb_page) + req->wb_offset; data->args.offset = req_offset(req) + req->wb_offset;
data->args.pgbase = req->wb_offset; data->args.pgbase = req->wb_offset;
data->args.count = count; data->args.count = count;
data->res.fattr = &data->fattr; data->res.fattr = &data->fattr;
...@@ -441,7 +441,7 @@ nfs_readpage_result(struct rpc_task *task) ...@@ -441,7 +441,7 @@ nfs_readpage_result(struct rpc_task *task)
req->wb_inode->i_sb->s_id, req->wb_inode->i_sb->s_id,
(long long)NFS_FILEID(req->wb_inode), (long long)NFS_FILEID(req->wb_inode),
req->wb_bytes, req->wb_bytes,
(long long)(page_offset(page) + req->wb_offset)); (long long)(req_offset(req) + req->wb_offset));
nfs_clear_request(req); nfs_clear_request(req);
nfs_release_request(req); nfs_release_request(req);
nfs_unlock_request(req); nfs_unlock_request(req);
......
...@@ -291,7 +291,7 @@ region_locked(struct inode *inode, struct nfs_page *req) ...@@ -291,7 +291,7 @@ region_locked(struct inode *inode, struct nfs_page *req)
if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
return 0; return 0;
rqstart = page_offset(req->wb_page) + req->wb_offset; rqstart = req_offset(req) + req->wb_offset;
rqend = rqstart + req->wb_bytes; rqend = rqstart + req->wb_bytes;
for (fl = inode->i_flock; fl; fl = fl->fl_next) { for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX) if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
...@@ -357,7 +357,7 @@ nfs_inode_remove_request(struct nfs_page *req) ...@@ -357,7 +357,7 @@ nfs_inode_remove_request(struct nfs_page *req)
* Find a request * Find a request
*/ */
static inline struct nfs_page * static inline struct nfs_page *
_nfs_find_request(struct inode *inode, struct page *page) _nfs_find_request(struct inode *inode, unsigned long index)
{ {
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
struct list_head *head, *next; struct list_head *head, *next;
...@@ -367,7 +367,7 @@ _nfs_find_request(struct inode *inode, struct page *page) ...@@ -367,7 +367,7 @@ _nfs_find_request(struct inode *inode, struct page *page)
while (next != head) { while (next != head) {
struct nfs_page *req = nfs_inode_wb_entry(next); struct nfs_page *req = nfs_inode_wb_entry(next);
next = next->next; next = next->next;
if (page_index(req->wb_page) != page_index(page)) if (req->wb_index != index)
continue; continue;
req->wb_count++; req->wb_count++;
return req; return req;
...@@ -376,12 +376,12 @@ _nfs_find_request(struct inode *inode, struct page *page) ...@@ -376,12 +376,12 @@ _nfs_find_request(struct inode *inode, struct page *page)
} }
static struct nfs_page * static struct nfs_page *
nfs_find_request(struct inode *inode, struct page *page) nfs_find_request(struct inode *inode, unsigned long index)
{ {
struct nfs_page *req; struct nfs_page *req;
spin_lock(&nfs_wreq_lock); spin_lock(&nfs_wreq_lock);
req = _nfs_find_request(inode, page); req = _nfs_find_request(inode, index);
spin_unlock(&nfs_wreq_lock); spin_unlock(&nfs_wreq_lock);
return req; return req;
} }
...@@ -457,7 +457,6 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_s ...@@ -457,7 +457,6 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_s
head = &nfsi->writeback; head = &nfsi->writeback;
p = head->next; p = head->next;
while (p != head) { while (p != head) {
unsigned long pg_idx;
struct nfs_page *req = nfs_inode_wb_entry(p); struct nfs_page *req = nfs_inode_wb_entry(p);
p = p->next; p = p->next;
...@@ -465,8 +464,7 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_s ...@@ -465,8 +464,7 @@ nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_s
if (file && req->wb_file != file) if (file && req->wb_file != file)
continue; continue;
pg_idx = page_index(req->wb_page); if (req->wb_index < idx_start || req->wb_index > idx_end)
if (pg_idx < idx_start || pg_idx > idx_end)
continue; continue;
if (!NFS_WBACK_BUSY(req)) if (!NFS_WBACK_BUSY(req))
...@@ -654,7 +652,7 @@ nfs_update_request(struct file* file, struct inode *inode, struct page *page, ...@@ -654,7 +652,7 @@ nfs_update_request(struct file* file, struct inode *inode, struct page *page,
* A request for the page we wish to update * A request for the page we wish to update
*/ */
spin_lock(&nfs_wreq_lock); spin_lock(&nfs_wreq_lock);
req = _nfs_find_request(inode, page); req = _nfs_find_request(inode, page->index);
if (req) { if (req) {
if (!nfs_lock_request_dontget(req)) { if (!nfs_lock_request_dontget(req)) {
int error; int error;
...@@ -776,7 +774,7 @@ nfs_flush_incompatible(struct file *file, struct page *page) ...@@ -776,7 +774,7 @@ nfs_flush_incompatible(struct file *file, struct page *page)
* Also do the same if we find a request from an existing * Also do the same if we find a request from an existing
* dropped page. * dropped page.
*/ */
req = nfs_find_request(inode,page); req = nfs_find_request(inode, page->index);
if (req) { if (req) {
if (req->wb_file != file || req->wb_cred != cred || req->wb_page != page) if (req->wb_file != file || req->wb_cred != cred || req->wb_page != page)
status = nfs_wb_page(inode, page); status = nfs_wb_page(inode, page);
...@@ -884,7 +882,7 @@ nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data) ...@@ -884,7 +882,7 @@ nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
data->inode = req->wb_inode; data->inode = req->wb_inode;
data->cred = req->wb_cred; data->cred = req->wb_cred;
data->args.fh = NFS_FH(req->wb_inode); data->args.fh = NFS_FH(req->wb_inode);
data->args.offset = page_offset(req->wb_page) + req->wb_offset; data->args.offset = req_offset(req) + req->wb_offset;
data->args.pgbase = req->wb_offset; data->args.pgbase = req->wb_offset;
data->args.count = count; data->args.count = count;
data->res.fattr = &data->fattr; data->res.fattr = &data->fattr;
...@@ -1072,7 +1070,7 @@ nfs_writeback_done(struct rpc_task *task) ...@@ -1072,7 +1070,7 @@ nfs_writeback_done(struct rpc_task *task)
req->wb_inode->i_sb->s_id, req->wb_inode->i_sb->s_id,
(long long)NFS_FILEID(req->wb_inode), (long long)NFS_FILEID(req->wb_inode),
req->wb_bytes, req->wb_bytes,
(long long)(page_offset(page) + req->wb_offset)); (long long)(req_offset(req) + req->wb_offset));
if (task->tk_status < 0) { if (task->tk_status < 0) {
ClearPageUptodate(page); ClearPageUptodate(page);
...@@ -1126,8 +1124,8 @@ nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data) ...@@ -1126,8 +1124,8 @@ nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
* Determine the offset range of requests in the COMMIT call. * Determine the offset range of requests in the COMMIT call.
* We rely on the fact that data->pages is an ordered list... * We rely on the fact that data->pages is an ordered list...
*/ */
start = page_offset(first->wb_page) + first->wb_offset; start = req_offset(first) + first->wb_offset;
end = page_offset(last->wb_page) + (last->wb_offset + last->wb_bytes); end = req_offset(last) + (last->wb_offset + last->wb_bytes);
len = end - start; len = end - start;
/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1)) if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1))
...@@ -1224,7 +1222,7 @@ nfs_commit_done(struct rpc_task *task) ...@@ -1224,7 +1222,7 @@ nfs_commit_done(struct rpc_task *task)
req->wb_inode->i_sb->s_id, req->wb_inode->i_sb->s_id,
(long long)NFS_FILEID(req->wb_inode), (long long)NFS_FILEID(req->wb_inode),
req->wb_bytes, req->wb_bytes,
(long long)(page_offset(req->wb_page) + req->wb_offset)); (long long)(req_offset(req) + req->wb_offset));
if (task->tk_status < 0) { if (task->tk_status < 0) {
if (req->wb_file) if (req->wb_file)
req->wb_file->f_error = task->tk_status; req->wb_file->f_error = task->tk_status;
......
...@@ -681,11 +681,13 @@ linvfs_get_blocks_direct( ...@@ -681,11 +681,13 @@ linvfs_get_blocks_direct(
STATIC int STATIC int
linvfs_direct_IO( linvfs_direct_IO(
int rw, int rw,
struct inode *inode, struct file *file,
const struct iovec *iov, const struct iovec *iov,
loff_t offset, loff_t offset,
unsigned long nr_segs) unsigned long nr_segs)
{ {
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return generic_direct_IO(rw, inode, iov, offset, nr_segs, return generic_direct_IO(rw, inode, iov, offset, nr_segs,
linvfs_get_blocks_direct); linvfs_get_blocks_direct);
} }
......
...@@ -308,7 +308,8 @@ struct address_space_operations { ...@@ -308,7 +308,8 @@ struct address_space_operations {
int (*bmap)(struct address_space *, long); int (*bmap)(struct address_space *, long);
int (*invalidatepage) (struct page *, unsigned long); int (*invalidatepage) (struct page *, unsigned long);
int (*releasepage) (struct page *, int); int (*releasepage) (struct page *, int);
int (*direct_IO)(int, struct inode *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); int (*direct_IO)(int, struct file *, const struct iovec *iov,
loff_t offset, unsigned long nr_segs);
}; };
struct backing_dev_info; struct backing_dev_info;
...@@ -1242,7 +1243,7 @@ ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, ...@@ -1242,7 +1243,7 @@ ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos); unsigned long nr_segs, loff_t *ppos);
extern ssize_t generic_file_sendfile(struct file *, struct file *, loff_t *, size_t); extern ssize_t generic_file_sendfile(struct file *, struct file *, loff_t *, size_t);
extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t);
extern ssize_t generic_file_direct_IO(int rw, struct inode *inode, extern ssize_t generic_file_direct_IO(int rw, struct file *file,
const struct iovec *iov, loff_t offset, unsigned long nr_segs); const struct iovec *iov, loff_t offset, unsigned long nr_segs);
extern int generic_direct_IO(int rw, struct inode *inode, const struct iovec extern int generic_direct_IO(int rw, struct inode *inode, const struct iovec
*iov, loff_t offset, unsigned long nr_segs, get_blocks_t *get_blocks); *iov, loff_t offset, unsigned long nr_segs, get_blocks_t *get_blocks);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/uio.h>
#include <linux/nfs_fs_sb.h> #include <linux/nfs_fs_sb.h>
...@@ -24,6 +25,7 @@ ...@@ -24,6 +25,7 @@
#include <linux/nfs.h> #include <linux/nfs.h>
#include <linux/nfs2.h> #include <linux/nfs2.h>
#include <linux/nfs3.h> #include <linux/nfs3.h>
#include <linux/nfs_page.h>
#include <linux/nfs_xdr.h> #include <linux/nfs_xdr.h>
/* /*
...@@ -242,9 +244,9 @@ loff_t page_offset(struct page *page) ...@@ -242,9 +244,9 @@ loff_t page_offset(struct page *page)
} }
static inline static inline
unsigned long page_index(struct page *page) loff_t req_offset(struct nfs_page *req)
{ {
return page->index; return ((loff_t)req->wb_index) << PAGE_CACHE_SHIFT;
} }
/* /*
...@@ -283,6 +285,12 @@ nfs_file_cred(struct file *file) ...@@ -283,6 +285,12 @@ nfs_file_cred(struct file *file)
return cred; return cred;
} }
/*
* linux/fs/nfs/direct.c
*/
extern int nfs_direct_IO(int, struct file *, const struct iovec *, loff_t,
unsigned long);
/* /*
* linux/fs/nfs/dir.c * linux/fs/nfs/dir.c
*/ */
...@@ -353,7 +361,8 @@ nfs_wb_all(struct inode *inode) ...@@ -353,7 +361,8 @@ nfs_wb_all(struct inode *inode)
static inline int static inline int
nfs_wb_page(struct inode *inode, struct page* page) nfs_wb_page(struct inode *inode, struct page* page)
{ {
int error = nfs_sync_file(inode, 0, page_index(page), 1, FLUSH_WAIT | FLUSH_STABLE); int error = nfs_sync_file(inode, 0, page->index, 1,
FLUSH_WAIT | FLUSH_STABLE);
return (error < 0) ? error : 0; return (error < 0) ? error : 0;
} }
......
...@@ -33,7 +33,8 @@ struct nfs_page { ...@@ -33,7 +33,8 @@ struct nfs_page {
struct page *wb_page; /* page to read in/write out */ struct page *wb_page; /* page to read in/write out */
wait_queue_head_t wb_wait; /* wait queue */ wait_queue_head_t wb_wait; /* wait queue */
unsigned long wb_timeout; /* when to read/write/commit */ unsigned long wb_timeout; /* when to read/write/commit */
unsigned int wb_offset, /* Offset of read/write */ unsigned long wb_index; /* Offset within mapping */
unsigned int wb_offset, /* Offset within page */
wb_bytes, /* Length of request */ wb_bytes, /* Length of request */
wb_count; /* reference count */ wb_count; /* reference count */
unsigned long wb_flags; unsigned long wb_flags;
...@@ -47,6 +48,7 @@ extern struct nfs_page *nfs_create_request(struct rpc_cred *, struct inode *, ...@@ -47,6 +48,7 @@ extern struct nfs_page *nfs_create_request(struct rpc_cred *, struct inode *,
unsigned int, unsigned int); unsigned int, unsigned int);
extern void nfs_clear_request(struct nfs_page *req); extern void nfs_clear_request(struct nfs_page *req);
extern void nfs_release_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req);
extern void nfs_release_list(struct list_head *list);
extern void nfs_list_add_request(struct nfs_page *, struct list_head *); extern void nfs_list_add_request(struct nfs_page *, struct list_head *);
...@@ -58,6 +60,7 @@ extern int nfs_scan_list(struct list_head *, struct list_head *, ...@@ -58,6 +60,7 @@ extern int nfs_scan_list(struct list_head *, struct list_head *,
extern int nfs_coalesce_requests(struct list_head *, struct list_head *, extern int nfs_coalesce_requests(struct list_head *, struct list_head *,
unsigned int); unsigned int);
extern int nfs_wait_on_request(struct nfs_page *); extern int nfs_wait_on_request(struct nfs_page *);
extern int nfs_wait_for_reads(struct list_head *);
extern spinlock_t nfs_wreq_lock; extern spinlock_t nfs_wreq_lock;
......
#ifndef _LINUX_NFS_XDR_H #ifndef _LINUX_NFS_XDR_H
#define _LINUX_NFS_XDR_H #define _LINUX_NFS_XDR_H
#include <linux/sunrpc/xprt.h>
struct nfs_fattr { struct nfs_fattr {
unsigned short valid; /* which fields are valid */ unsigned short valid; /* which fields are valid */
__u64 pre_size; /* pre_op_attr.size */ __u64 pre_size; /* pre_op_attr.size */
...@@ -57,10 +59,14 @@ struct nfs_fsinfo { ...@@ -57,10 +59,14 @@ struct nfs_fsinfo {
__u32 namelen;/* max name length */ __u32 namelen;/* max name length */
}; };
/* Arguments to the read call. /*
* Note that NFS_READ_MAXIOV must be <= (MAX_IOVEC-2) from sunrpc/xprt.h * Arguments to the read call.
*/ */
#define NFS_READ_MAXIOV 8
#define NFS_READ_MAXIOV (9U)
#if (NFS_READ_MAXIOV > (MAX_IOVEC -2))
#error "NFS_READ_MAXIOV is too large"
#endif
struct nfs_readargs { struct nfs_readargs {
struct nfs_fh * fh; struct nfs_fh * fh;
...@@ -76,10 +82,14 @@ struct nfs_readres { ...@@ -76,10 +82,14 @@ struct nfs_readres {
int eof; int eof;
}; };
/* Arguments to the write call. /*
* Note that NFS_WRITE_MAXIOV must be <= (MAX_IOVEC-2) from sunrpc/xprt.h * Arguments to the write call.
*/ */
#define NFS_WRITE_MAXIOV 8 #define NFS_WRITE_MAXIOV (9U)
#if (NFS_WRITE_MAXIOV > (MAX_IOVEC -2))
#error "NFS_WRITE_MAXIOV is too large"
#endif
struct nfs_writeargs { struct nfs_writeargs {
struct nfs_fh * fh; struct nfs_fh * fh;
__u64 offset; __u64 offset;
......
...@@ -193,6 +193,7 @@ EXPORT_SYMBOL(invalidate_bdev); ...@@ -193,6 +193,7 @@ EXPORT_SYMBOL(invalidate_bdev);
EXPORT_SYMBOL(invalidate_inodes); EXPORT_SYMBOL(invalidate_inodes);
EXPORT_SYMBOL(invalidate_device); EXPORT_SYMBOL(invalidate_device);
EXPORT_SYMBOL(invalidate_inode_pages); EXPORT_SYMBOL(invalidate_inode_pages);
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
EXPORT_SYMBOL(truncate_inode_pages); EXPORT_SYMBOL(truncate_inode_pages);
EXPORT_SYMBOL(fsync_bdev); EXPORT_SYMBOL(fsync_bdev);
EXPORT_SYMBOL(permission); EXPORT_SYMBOL(permission);
......
...@@ -852,7 +852,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, ...@@ -852,7 +852,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
nr_segs = iov_shorten((struct iovec *)iov, nr_segs = iov_shorten((struct iovec *)iov,
nr_segs, count); nr_segs, count);
} }
retval = generic_file_direct_IO(READ, inode, retval = generic_file_direct_IO(READ, filp,
iov, pos, nr_segs); iov, pos, nr_segs);
if (retval > 0) if (retval > 0)
*ppos = pos + retval; *ppos = pos + retval;
...@@ -1534,7 +1534,7 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov, ...@@ -1534,7 +1534,7 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov,
if (count != ocount) if (count != ocount)
nr_segs = iov_shorten((struct iovec *)iov, nr_segs = iov_shorten((struct iovec *)iov,
nr_segs, count); nr_segs, count);
written = generic_file_direct_IO(WRITE, inode, written = generic_file_direct_IO(WRITE, file,
iov, pos, nr_segs); iov, pos, nr_segs);
if (written > 0) { if (written > 0) {
loff_t end = pos + written; loff_t end = pos + written;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment