Commit ca9268fe authored by Trond Myklebust's avatar Trond Myklebust

NFSv2/v3/v4: New attribute revalidation code that no

     longer relies on ctime for correctness in avoiding
     update races.

VFS: allow filesystems to disable inode_update_time() on
     a per-inode basis.
parent 9bf35f8c
......@@ -1178,6 +1178,8 @@ void inode_update_time(struct inode *inode, int ctime_too)
struct timespec now;
int sync_it = 0;
if (IS_NOCMTIME(inode))
return;
if (IS_RDONLY(inode))
return;
......
This diff is collapsed.
......@@ -269,6 +269,7 @@ nfs_direct_write_seg(struct inode *inode, struct file *file,
if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
wdata.args.stable = NFS_FILE_SYNC;
nfs_begin_data_update(inode);
retry:
need_commit = 0;
tot_bytes = 0;
......@@ -334,6 +335,8 @@ nfs_direct_write_seg(struct inode *inode, struct file *file,
VERF_SIZE) != 0)
goto sync_retry;
}
nfs_end_data_update(inode);
NFS_FLAGS(inode) |= NFS_INO_INVALID_DATA;
return tot_bytes;
......
......@@ -104,11 +104,16 @@ nfs_file_flush(struct file *file)
dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
if ((file->f_mode & FMODE_WRITE) == 0)
return 0;
lock_kernel();
status = nfs_wb_file(inode, file);
/* Ensure that data+attribute caches are up to date after close() */
status = nfs_wb_all(inode);
if (!status) {
status = file->f_error;
file->f_error = 0;
if (!status)
__nfs_revalidate_inode(NFS_SERVER(inode), inode);
}
unlock_kernel();
return status;
......
This diff is collapsed.
......@@ -68,20 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_task *task)
return 1;
}
static void
nfs3_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
{
if (fattr->valid & NFS_ATTR_FATTR) {
if (!(fattr->valid & NFS_ATTR_WCC)) {
fattr->pre_size = NFS_CACHE_ISIZE(inode);
fattr->pre_mtime = NFS_CACHE_MTIME(inode);
fattr->pre_ctime = NFS_CACHE_CTIME(inode);
fattr->valid |= NFS_ATTR_WCC;
}
nfs_refresh_inode(inode, fattr);
}
}
static struct rpc_cred *
nfs_cred(struct inode *inode, struct file *filp)
{
......@@ -280,7 +266,7 @@ nfs3_proc_write(struct nfs_write_data *wdata, struct file *filp)
msg.rpc_cred = nfs_cred(inode, filp);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
if (status >= 0)
nfs3_write_refresh_inode(inode, fattr);
nfs_refresh_inode(inode, fattr);
dprintk("NFS reply write: %d\n", status);
return status < 0? status : wdata->res.count;
}
......@@ -303,7 +289,7 @@ nfs3_proc_commit(struct nfs_write_data *cdata, struct file *filp)
msg.rpc_cred = nfs_cred(inode, filp);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
if (status >= 0)
nfs3_write_refresh_inode(inode, fattr);
nfs_refresh_inode(inode, fattr);
dprintk("NFS reply commit: %d\n", status);
return status;
}
......@@ -777,12 +763,13 @@ nfs3_proc_read_setup(struct nfs_read_data *data, unsigned int count)
static void
nfs3_write_done(struct rpc_task *task)
{
struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
struct nfs_write_data *data;
if (nfs3_async_handle_jukebox(task))
return;
data = (struct nfs_write_data *)task->tk_calldata;
if (task->tk_status >= 0)
nfs3_write_refresh_inode(data->inode, data->res.fattr);
nfs_refresh_inode(data->inode, data->res.fattr);
nfs_writeback_done(task);
}
......@@ -835,12 +822,13 @@ nfs3_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how)
static void
nfs3_commit_done(struct rpc_task *task)
{
struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
struct nfs_write_data *data;
if (nfs3_async_handle_jukebox(task))
return;
data = (struct nfs_write_data *)task->tk_calldata;
if (task->tk_status >= 0)
nfs3_write_refresh_inode(data->inode, data->res.fattr);
nfs_refresh_inode(data->inode, data->res.fattr);
nfs_commit_done(task);
}
......
......@@ -1088,12 +1088,8 @@ nfs4_proc_read(struct nfs_read_data *rdata, struct file *filp)
fattr->valid = 0;
status = rpc_call_sync(server->client, &msg, flags);
if (!status) {
if (!status)
renew_lease(server, timestamp);
/* Check cache consistency */
if (fattr->change_attr != NFS_CHANGE_ATTR(inode))
nfs_zap_caches(inode);
}
dprintk("NFS reply read: %d\n", status);
return status;
}
......@@ -1130,7 +1126,6 @@ nfs4_proc_write(struct nfs_write_data *wdata, struct file *filp)
fattr->valid = 0;
status = rpc_call_sync(server->client, &msg, rpcflags);
NFS_CACHEINV(inode);
dprintk("NFS reply write: %d\n", status);
return status;
}
......@@ -1517,7 +1512,6 @@ nfs4_read_done(struct rpc_task *task)
{
struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
struct inode *inode = data->inode;
struct nfs_fattr *fattr = data->res.fattr;
if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
task->tk_action = nfs4_restart_read;
......@@ -1525,11 +1519,6 @@ nfs4_read_done(struct rpc_task *task)
}
if (task->tk_status > 0)
renew_lease(NFS_SERVER(inode), data->timestamp);
/* Check cache consistency */
if (fattr->change_attr != NFS_CHANGE_ATTR(inode))
nfs_zap_caches(inode);
if (fattr->bitmap[1] & FATTR4_WORD1_TIME_ACCESS)
inode->i_atime = fattr->atime;
/* Call back common NFS readpage processing */
nfs_readpage_result(task);
}
......@@ -1576,21 +1565,6 @@ nfs4_proc_read_setup(struct nfs_read_data *data, unsigned int count)
rpc_call_setup(task, &msg, 0);
}
static void
nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
{
/* Check cache consistency */
if (fattr->pre_change_attr != NFS_CHANGE_ATTR(inode))
nfs_zap_caches(inode);
NFS_CHANGE_ATTR(inode) = fattr->change_attr;
if (fattr->bitmap[1] & FATTR4_WORD1_SPACE_USED)
inode->i_blocks = (fattr->du.nfs3.used + 511) >> 9;
if (fattr->bitmap[1] & FATTR4_WORD1_TIME_METADATA)
inode->i_ctime = fattr->ctime;
if (fattr->bitmap[1] & FATTR4_WORD1_TIME_MODIFY)
inode->i_mtime = fattr->mtime;
}
static void
nfs4_restart_write(struct rpc_task *task)
{
......@@ -1617,7 +1591,6 @@ nfs4_write_done(struct rpc_task *task)
}
if (task->tk_status >= 0)
renew_lease(NFS_SERVER(inode), data->timestamp);
nfs4_write_refresh_inode(inode, data->res.fattr);
/* Call back common NFS writeback processing */
nfs_writeback_done(task);
}
......@@ -1684,7 +1657,6 @@ nfs4_commit_done(struct rpc_task *task)
task->tk_action = nfs4_restart_write;
return;
}
nfs4_write_refresh_inode(inode, data->res.fattr);
/* Call back common NFS writeback processing */
nfs_commit_done(task);
}
......@@ -1807,6 +1779,7 @@ nfs4_proc_file_open(struct inode *inode, struct file *filp)
if (filp->f_mode & FMODE_WRITE) {
lock_kernel();
nfs_set_mmcred(inode, state->owner->so_cred);
nfs_begin_data_update(inode);
unlock_kernel();
}
filp->private_data = state;
......@@ -1823,6 +1796,11 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp)
if (state)
nfs4_close_state(state, filp->f_mode);
if (filp->f_mode & FMODE_WRITE) {
lock_kernel();
nfs_end_data_update(inode);
unlock_kernel();
}
return 0;
}
......
......@@ -49,18 +49,6 @@
extern struct rpc_procinfo nfs_procedures[];
static void
nfs_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
{
if (!(fattr->valid & NFS_ATTR_WCC)) {
fattr->pre_size = NFS_CACHE_ISIZE(inode);
fattr->pre_mtime = NFS_CACHE_MTIME(inode);
fattr->pre_ctime = NFS_CACHE_CTIME(inode);
fattr->valid |= NFS_ATTR_WCC;
}
nfs_refresh_inode(inode, fattr);
}
static struct rpc_cred *
nfs_cred(struct inode *inode, struct file *filp)
{
......@@ -205,7 +193,7 @@ nfs_proc_write(struct nfs_write_data *wdata, struct file *filp)
msg.rpc_cred = nfs_cred(inode, filp);
status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
if (status >= 0) {
nfs_write_refresh_inode(inode, fattr);
nfs_refresh_inode(inode, fattr);
wdata->res.count = wdata->args.count;
wdata->verf.committed = NFS_FILE_SYNC;
}
......@@ -331,10 +319,8 @@ nfs_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
{
struct rpc_message *msg = &task->tk_msg;
if (msg->rpc_argp) {
NFS_CACHEINV(dir->d_inode);
if (msg->rpc_argp)
kfree(msg->rpc_argp);
}
return 0;
}
......@@ -584,7 +570,7 @@ nfs_write_done(struct rpc_task *task)
struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
if (task->tk_status >= 0)
nfs_write_refresh_inode(data->inode, data->res.fattr);
nfs_refresh_inode(data->inode, data->res.fattr);
nfs_writeback_done(task);
}
......
......@@ -124,6 +124,7 @@ nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page)
if (result < rdata.args.count) /* NFSv2ism */
break;
} while (count);
NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
if (count)
memclear_highpage_flush(page, rdata.args.pgbase, count);
......@@ -266,6 +267,7 @@ nfs_readpage_result(struct rpc_task *task)
dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
task->tk_pid, task->tk_status);
NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
struct page *page = req->wb_page;
......
......@@ -104,6 +104,7 @@ nfs_async_unlink_init(struct rpc_task *task)
status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name);
if (status < 0)
goto out_err;
nfs_begin_data_update(dir->d_inode);
rpc_call_setup(task, &msg, 0);
return;
out_err:
......@@ -126,7 +127,7 @@ nfs_async_unlink_done(struct rpc_task *task)
if (!dir)
return;
dir_i = dir->d_inode;
nfs_zap_caches(dir_i);
nfs_end_data_update(dir_i);
if (NFS_PROTO(dir_i)->unlink_done(dir, task))
return;
put_rpccred(data->cred);
......
......@@ -157,6 +157,7 @@ nfs_writepage_sync(struct file *file, struct inode *inode, struct page *page,
(long long)NFS_FILEID(inode),
count, (long long)(page_offset(page) + offset));
nfs_begin_data_update(inode);
do {
if (count < wsize && !swapfile)
wdata.args.count = count;
......@@ -190,15 +191,15 @@ nfs_writepage_sync(struct file *file, struct inode *inode, struct page *page,
ClearPageError(page);
io_error:
nfs_end_data_update(inode);
if (wdata.cred)
put_rpccred(wdata.cred);
return written ? written : result;
}
static int
nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
unsigned int offset, unsigned int count)
static int nfs_writepage_async(struct file *file, struct inode *inode,
struct page *page, unsigned int offset, unsigned int count)
{
struct nfs_page *req;
loff_t end;
......@@ -213,7 +214,6 @@ nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
if (i_size_read(inode) < end)
i_size_write(inode, end);
out:
return status;
}
......@@ -312,8 +312,10 @@ nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
BUG_ON(error == -EEXIST);
if (error)
return error;
if (!nfsi->npages)
if (!nfsi->npages) {
igrab(inode);
nfs_begin_data_update(inode);
}
nfsi->npages++;
req->wb_count++;
return 0;
......@@ -336,6 +338,7 @@ nfs_inode_remove_request(struct nfs_page *req)
nfsi->npages--;
if (!nfsi->npages) {
spin_unlock(&nfs_wreq_lock);
nfs_end_data_update(inode);
iput(inode);
} else
spin_unlock(&nfs_wreq_lock);
......@@ -891,10 +894,7 @@ nfs_writeback_done(struct rpc_task *task)
#endif
/*
* Update attributes as result of writeback.
* FIXME: There is an inherent race with invalidate_inode_pages and
* writebacks since the page->count is kept > 1 for as long
* as the page has a write request pending.
* Process the nfs_page list
*/
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
......
......@@ -138,6 +138,7 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
#define S_DEAD 32 /* removed, but still open directory */
#define S_NOQUOTA 64 /* Inode is not counted to quota */
#define S_DIRSYNC 128 /* Directory modifications are synchronous */
#define S_NOCMTIME 256 /* Do not update file c/mtime */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
......@@ -171,6 +172,7 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
#define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND)
#define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
/* the read-only stuff doesn't really belong here, but any other place is
probably as bad and I don't want to create yet another include file. */
......
......@@ -99,7 +99,7 @@ struct nfs_inode {
/*
* Various flags
*/
unsigned short flags;
unsigned int flags;
/*
* read_cache_jiffies is when we started read-caching this inode,
......@@ -118,19 +118,22 @@ struct nfs_inode {
*
* mtime != read_cache_mtime
*/
unsigned long readdir_timestamp;
unsigned long read_cache_jiffies;
struct timespec read_cache_ctime;
struct timespec read_cache_mtime;
__u64 read_cache_isize;
unsigned long attrtimeo;
unsigned long attrtimeo_timestamp;
__u64 change_attr; /* v4 only */
/* "Generation counter" for the attribute cache. This is
* bumped whenever we update the metadata on the
* server.
*/
unsigned long cache_change_attribute;
/*
* Timestamp that dates the change made to read_cache_mtime.
* This is of use for dentry revalidation
* Counter indicating the number of outstanding requests that
* will cause a file data update.
*/
unsigned long cache_mtime_jiffies;
atomic_t data_updates;
struct nfs_access_cache cache_access;
......@@ -170,7 +173,9 @@ struct nfs_inode {
#define NFS_INO_STALE 0x0001 /* possible stale inode */
#define NFS_INO_ADVISE_RDPLUS 0x0002 /* advise readdirplus */
#define NFS_INO_REVALIDATING 0x0004 /* revalidating attrs */
#define NFS_INO_FLUSH 0x0008 /* inode is due for flushing */
#define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */
#define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */
#define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */
#define NFS_INO_FAKE_ROOT 0x0080 /* root inode placeholder */
static inline struct nfs_inode *NFS_I(struct inode *inode)
......@@ -186,15 +191,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode)
#define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode)))
#define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf)
#define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies)
#define NFS_MTIME_UPDATE(inode) (NFS_I(inode)->cache_mtime_jiffies)
#define NFS_CACHE_CTIME(inode) (NFS_I(inode)->read_cache_ctime)
#define NFS_CACHE_MTIME(inode) (NFS_I(inode)->read_cache_mtime)
#define NFS_CACHE_ISIZE(inode) (NFS_I(inode)->read_cache_isize)
#define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr)
#define NFS_CACHEINV(inode) \
do { \
NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; \
} while (0)
#define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo)
#define NFS_MINATTRTIMEO(inode) \
(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
......@@ -211,6 +208,17 @@ do { \
#define NFS_FILEID(inode) (NFS_I(inode)->fileid)
static inline int nfs_caches_unstable(struct inode *inode)
{
return atomic_read(&NFS_I(inode)->data_updates) != 0;
}
static inline void NFS_CACHEINV(struct inode *inode)
{
if (!nfs_caches_unstable(inode))
NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR;
}
static inline int nfs_server_capable(struct inode *inode, int cap)
{
return NFS_SERVER(inode)->caps & cap;
......@@ -227,13 +235,37 @@ loff_t page_offset(struct page *page)
return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
}
/**
* nfs_save_change_attribute - Returns the inode attribute change cookie
* @inode - pointer to inode
* The "change attribute" is updated every time we finish an operation
* that will result in a metadata change on the server.
*/
static inline long nfs_save_change_attribute(struct inode *inode)
{
return NFS_I(inode)->cache_change_attribute;
}
/**
* nfs_verify_change_attribute - Detects NFS inode cache updates
* @inode - pointer to inode
* @chattr - previously saved change attribute
* Return "false" if metadata has been updated (or is in the process of
* being updated) since the change attribute was saved.
*/
static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr)
{
return !nfs_caches_unstable(inode)
&& chattr == NFS_I(inode)->cache_change_attribute;
}
/*
* linux/fs/nfs/inode.c
*/
extern void nfs_zap_caches(struct inode *);
extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
struct nfs_fattr *);
extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int nfs_permission(struct inode *, int, struct nameidata *);
extern void nfs_set_mmcred(struct inode *, struct rpc_cred *);
......@@ -241,6 +273,10 @@ extern int nfs_open(struct inode *, struct file *);
extern int nfs_release(struct inode *, struct file *);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_setattr(struct dentry *, struct iattr *);
extern void nfs_begin_attr_update(struct inode *);
extern void nfs_end_attr_update(struct inode *);
extern void nfs_begin_data_update(struct inode *);
extern void nfs_end_data_update(struct inode *);
/*
* linux/fs/nfs/file.c
......@@ -383,20 +419,27 @@ extern int nfsroot_mount(struct sockaddr_in *, char *, struct nfs_fh *,
/*
* inline functions
*/
static inline int
nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
static inline int nfs_attribute_timeout(struct inode *inode)
{
if (time_before(jiffies, NFS_READTIME(inode)+NFS_ATTRTIMEO(inode)))
return NFS_STALE(inode) ? -ESTALE : 0;
return __nfs_revalidate_inode(server, inode);
struct nfs_inode *nfsi = NFS_I(inode);
return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo);
}
static inline int
nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
/**
* nfs_revalidate_inode - Revalidate the inode attributes
* @server - pointer to nfs_server struct
* @inode - pointer to inode struct
*
* Updates inode attribute information by retrieving the data from the server.
*/
static inline int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
{
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
return __nfs_refresh_inode(inode,fattr);
if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
&& !nfs_attribute_timeout(inode))
return NFS_STALE(inode) ? -ESTALE : 0;
return __nfs_revalidate_inode(server, inode);
}
static inline loff_t
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment