Commit cb1410c7 authored by Weston Andros Adamson's avatar Weston Andros Adamson Committed by Trond Myklebust

NFS: fix subtle change in COMMIT behavior

Recent work in the pgio layer made it possible for there to be more than one
request per page. This caused a subtle change in commit behavior, because
write.c:nfs_commit_unstable_pages compares the number of *pages* waiting for
writeback against the number of requests on a commit list to choose when to
send a COMMIT in a non-blocking flush.

This is probably hard to hit in normal operation - you have to be using
rsize/wsize < PAGE_SIZE, or pnfs with lots of boundaries that are not page
aligned to have a noticeable change in behavior.
Signed-off-by: default avatarWeston Andros Adamson <dros@primarydata.com>
Signed-off-by: default avatarTrond Myklebust <trond.myklebust@primarydata.com>
parent 6a74c0c9
...@@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, ...@@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
goto out_iput; goto out_iput;
res->size = i_size_read(inode); res->size = i_size_read(inode);
res->change_attr = delegation->change_attr; res->change_attr = delegation->change_attr;
if (nfsi->npages != 0) if (nfsi->nrequests != 0)
res->change_attr++; res->change_attr++;
res->ctime = inode->i_ctime; res->ctime = inode->i_ctime;
res->mtime = inode->i_mtime; res->mtime = inode->i_mtime;
......
...@@ -1149,7 +1149,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr ...@@ -1149,7 +1149,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE) if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
&& (fattr->valid & NFS_ATTR_FATTR_SIZE) && (fattr->valid & NFS_ATTR_FATTR_SIZE)
&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) && i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
&& nfsi->npages == 0) { && nfsi->nrequests == 0) {
i_size_write(inode, nfs_size_to_loff_t(fattr->size)); i_size_write(inode, nfs_size_to_loff_t(fattr->size));
ret |= NFS_INO_INVALID_ATTR; ret |= NFS_INO_INVALID_ATTR;
} }
...@@ -1192,7 +1192,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat ...@@ -1192,7 +1192,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (fattr->valid & NFS_ATTR_FATTR_SIZE) { if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
cur_size = i_size_read(inode); cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size); new_isize = nfs_size_to_loff_t(fattr->size);
if (cur_size != new_isize && nfsi->npages == 0) if (cur_size != new_isize && nfsi->nrequests == 0)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
} }
...@@ -1619,7 +1619,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1619,7 +1619,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (new_isize != cur_isize) { if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has /* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */ * the file grown beyond our last write? */
if ((nfsi->npages == 0) || new_isize > cur_isize) { if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
i_size_write(inode, new_isize); i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
invalid &= ~NFS_INO_REVAL_PAGECACHE; invalid &= ~NFS_INO_REVAL_PAGECACHE;
...@@ -1784,7 +1784,7 @@ static void init_once(void *foo) ...@@ -1784,7 +1784,7 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
INIT_LIST_HEAD(&nfsi->commit_info.list); INIT_LIST_HEAD(&nfsi->commit_info.list);
nfsi->npages = 0; nfsi->nrequests = 0;
nfsi->commit_info.ncommit = 0; nfsi->commit_info.ncommit = 0;
atomic_set(&nfsi->commit_info.rpcs_out, 0); atomic_set(&nfsi->commit_info.rpcs_out, 0);
atomic_set(&nfsi->silly_count, 1); atomic_set(&nfsi->silly_count, 1);
......
...@@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit) ...@@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
static inline void static inline void
nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
{ {
struct inode *inode;
WARN_ON_ONCE(prev == req); WARN_ON_ONCE(prev == req);
if (!prev) { if (!prev) {
...@@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev) ...@@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
* nfs_page_group_destroy is called */ * nfs_page_group_destroy is called */
kref_get(&req->wb_head->wb_kref); kref_get(&req->wb_head->wb_kref);
/* grab extra ref if head request has extra ref from /* grab extra ref and bump the request count if head request
* the write/commit path to handle handoff between write * has extra ref from the write/commit path to handle handoff
* and commit lists */ * between write and commit lists. */
if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) { if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
inode = page_file_mapping(req->wb_page)->host;
set_bit(PG_INODE_REF, &req->wb_flags); set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref); kref_get(&req->wb_kref);
spin_lock(&inode->i_lock);
NFS_I(inode)->nrequests++;
spin_unlock(&inode->i_lock);
} }
} }
} }
......
...@@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) ...@@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
nfs_lock_request(req); nfs_lock_request(req);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE)) if (!nfsi->nrequests &&
NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
inode->i_version++; inode->i_version++;
/* /*
* Swap-space should not get truncated. Hence no need to plug the race * Swap-space should not get truncated. Hence no need to plug the race
...@@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) ...@@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
SetPagePrivate(req->wb_page); SetPagePrivate(req->wb_page);
set_page_private(req->wb_page, (unsigned long)req); set_page_private(req->wb_page, (unsigned long)req);
} }
nfsi->npages++; nfsi->nrequests++;
/* this a head request for a page group - mark it as having an /* this a head request for a page group - mark it as having an
* extra reference so sub groups can follow suit */ * extra reference so sub groups can follow suit.
* This flag also informs pgio layer when to bump nrequests when
* adding subrequests. */
WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags)); WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
kref_get(&req->wb_kref); kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
...@@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req) ...@@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
wake_up_page(head->wb_page, PG_private); wake_up_page(head->wb_page, PG_private);
clear_bit(PG_MAPPED, &head->wb_flags); clear_bit(PG_MAPPED, &head->wb_flags);
} }
nfsi->npages--; nfsi->nrequests--;
spin_unlock(&inode->i_lock);
} else {
spin_lock(&inode->i_lock);
nfsi->nrequests--;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
...@@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr ...@@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
/* Don't commit yet if this is a non-blocking flush and there /* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping. * are a lot of outstanding writes for this mapping.
*/ */
if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
goto out_mark_dirty; goto out_mark_dirty;
/* don't wait for the COMMIT response */ /* don't wait for the COMMIT response */
......
...@@ -163,7 +163,7 @@ struct nfs_inode { ...@@ -163,7 +163,7 @@ struct nfs_inode {
*/ */
__be32 cookieverf[2]; __be32 cookieverf[2];
unsigned long npages; unsigned long nrequests;
struct nfs_mds_commit_info commit_info; struct nfs_mds_commit_info commit_info;
/* Open contexts for shared mmap writes */ /* Open contexts for shared mmap writes */
...@@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data); ...@@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data);
static inline int static inline int
nfs_have_writebacks(struct inode *inode) nfs_have_writebacks(struct inode *inode)
{ {
return NFS_I(inode)->npages != 0; return NFS_I(inode)->nrequests != 0;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment