Commit ec12ac49 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] writeback correctness and efficiency changes

This is a performance and correctness fix against the writeback paths.

The writeback code has competing requirements.  Sometimes it is used
for "memory cleansing": kupdate, bdflush, writer throttling, page
allocator writeback, etc.  And sometimes this same code is used for
data integrity pruposes: fsync, msync, fdatasync, sync, umount, various
other kernel-internal uses.

The problem is: how to handle a dirty buffer or page which is currently
under writeback.

For memory cleansing, we just want to skip that buffer/page and go onto
the next one.  But for sync, we must wait on the old writeback and then
start new writeback.

mpage_writepages() is current correct for cleansing, but incorrect for
sync.  block_write_full_page() is currently correct for sync, but
inefficient for cleansing.

The fix is fairly simple.

- In mpage_writepages(), don't skip the page is it's a sync
operation.

- In block_write_full_page(), skip the buffer if it is a sync
operation.  And return -EAGAIN to tell the caller that the writeout
didn't work out.  The caller must then set the page dirty again and
move it onto mapping->dirty_pages.

This is an extension of the writepage API: writepage can now return
EAGAIN.  There are only three callers, and they have been updated.

fail_writepage() and ext3_writepage() were actually doing this by
hand.  They have been changed to return -EAGAIN.  NTFS will want to
be able to return -EAGAIN from its writepage as well.

- A sticky question is: how to tell the writeout code which mode it
is operating in?  Cleansing or sync?

It's such a tiny code change that I didn't have the heart to go and
propagate a `mode' argument down every instance of writepages() and
writepage() in the kernel.  So I passed it in via current->flags.

Incidentally, the occurrence of a locked-and-dirty buffer in
block_write_full_page() is fairly rare: normally the collision avoidance
happens at the address_space level, via PageWriteback.  But some
mappings (blockdevs, ext3 files, etc) have their dirty buffers written
out via submit_bh().  It is these buffers which can stall
block_write_full_page().

This wart will be pretty intrusive to fix.  ext3 needs to become fully
page-based (ugh.  It's a block-based journalling filesystem, and pages
are unnatural).  blockdev mappings are still written out by buffers
because that's how filesystems use them.  Putting _all_ metadata
(indirects, inodes, superblocks, etc) into standalone address_spaces
would fix that up.

- filemap_fdatawrite() sets PF_SYNC.  So filemap_fdatawrite() is the
kernel function which will start writeback against a mapping for
"data integrity" purposes, whereas the unexported, internal-only
do_writepages() is the writeback function which is used for memory
cleansing.  This difference is the reason why I didn't consolidate
those functions ages ago...

- Lots of code paths had a bogus extra call to filemap_fdatawait(),
which I previously added in a moment of weak-headedness.  They have
all been removed.
parent 8fd3d458
...@@ -307,10 +307,7 @@ asmlinkage long sys_fsync(unsigned int fd) ...@@ -307,10 +307,7 @@ asmlinkage long sys_fsync(unsigned int fd)
/* We need to protect against concurrent writers.. */ /* We need to protect against concurrent writers.. */
down(&inode->i_sem); down(&inode->i_sem);
ret = filemap_fdatawait(inode->i_mapping); ret = filemap_fdatawrite(inode->i_mapping);
err = filemap_fdatawrite(inode->i_mapping);
if (!ret)
ret = err;
err = file->f_op->fsync(file, dentry, 0); err = file->f_op->fsync(file, dentry, 0);
if (!ret) if (!ret)
ret = err; ret = err;
...@@ -345,10 +342,7 @@ asmlinkage long sys_fdatasync(unsigned int fd) ...@@ -345,10 +342,7 @@ asmlinkage long sys_fdatasync(unsigned int fd)
goto out_putf; goto out_putf;
down(&inode->i_sem); down(&inode->i_sem);
ret = filemap_fdatawait(inode->i_mapping); ret = filemap_fdatawrite(inode->i_mapping);
err = filemap_fdatawrite(inode->i_mapping);
if (!ret)
ret = err;
err = file->f_op->fsync(file, dentry, 1); err = file->f_op->fsync(file, dentry, 1);
if (!ret) if (!ret)
ret = err; ret = err;
...@@ -1648,11 +1642,18 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block) ...@@ -1648,11 +1642,18 @@ void unmap_underlying_metadata(struct block_device *bdev, sector_t block)
* the page lock, whoever dirtied the buffers may decide to clean them * the page lock, whoever dirtied the buffers may decide to clean them
* again at any time. We handle that by only looking at the buffer * again at any time. We handle that by only looking at the buffer
* state inside lock_buffer(). * state inside lock_buffer().
*
* If block_write_full_page() is called for regular writeback
* (called_for_sync() is false) then it will return -EAGAIN for a locked
* buffer. This only can happen if someone has written the buffer directly,
* with submit_bh(). At the address_space level PageWriteback prevents this
* contention from occurring.
*/ */
static int __block_write_full_page(struct inode *inode, static int __block_write_full_page(struct inode *inode,
struct page *page, get_block_t *get_block) struct page *page, get_block_t *get_block)
{ {
int err; int err;
int ret = 0;
unsigned long block; unsigned long block;
unsigned long last_block; unsigned long last_block;
struct buffer_head *bh, *head; struct buffer_head *bh, *head;
...@@ -1724,7 +1725,14 @@ static int __block_write_full_page(struct inode *inode, ...@@ -1724,7 +1725,14 @@ static int __block_write_full_page(struct inode *inode,
do { do {
get_bh(bh); get_bh(bh);
if (buffer_mapped(bh) && buffer_dirty(bh)) { if (buffer_mapped(bh) && buffer_dirty(bh)) {
lock_buffer(bh); if (called_for_sync()) {
lock_buffer(bh);
} else {
if (test_set_buffer_locked(bh)) {
ret = -EAGAIN;
continue;
}
}
if (test_clear_buffer_dirty(bh)) { if (test_clear_buffer_dirty(bh)) {
if (!buffer_uptodate(bh)) if (!buffer_uptodate(bh))
buffer_error(); buffer_error();
...@@ -1733,8 +1741,7 @@ static int __block_write_full_page(struct inode *inode, ...@@ -1733,8 +1741,7 @@ static int __block_write_full_page(struct inode *inode,
unlock_buffer(bh); unlock_buffer(bh);
} }
} }
bh = bh->b_this_page; } while ((bh = bh->b_this_page) != head);
} while (bh != head);
BUG_ON(PageWriteback(page)); BUG_ON(PageWriteback(page));
SetPageWriteback(page); /* Keeps try_to_free_buffers() away */ SetPageWriteback(page); /* Keeps try_to_free_buffers() away */
...@@ -1774,6 +1781,8 @@ static int __block_write_full_page(struct inode *inode, ...@@ -1774,6 +1781,8 @@ static int __block_write_full_page(struct inode *inode,
SetPageUptodate(page); SetPageUptodate(page);
end_page_writeback(page); end_page_writeback(page);
} }
if (err == 0)
return ret;
return err; return err;
recover: recover:
/* /*
......
...@@ -1344,10 +1344,11 @@ static int ext3_writepage(struct page *page) ...@@ -1344,10 +1344,11 @@ static int ext3_writepage(struct page *page)
/* /*
* We have to fail this writepage to avoid cross-fs transactions. * We have to fail this writepage to avoid cross-fs transactions.
* Put the page back on mapping->dirty_pages, but leave its buffer's * Return EAGAIN so the caller will the page back on
* dirty state as-is. * mapping->dirty_pages. The page's buffers' dirty state will be left
* as-is.
*/ */
__set_page_dirty_nobuffers(page); ret = -EAGAIN;
unlock_page(page); unlock_page(page);
return ret; return ret;
} }
...@@ -1405,9 +1406,9 @@ ext3_writepages(struct address_space *mapping, int *nr_to_write) ...@@ -1405,9 +1406,9 @@ ext3_writepages(struct address_space *mapping, int *nr_to_write)
} }
struct address_space_operations ext3_writeback_aops = { struct address_space_operations ext3_writeback_aops = {
.readpage = ext3_readpage, /* BKL not held. Don't need */ .readpage = ext3_readpage, /* BKL not held. Don't need */
.readpages = ext3_readpages, /* BKL not held. Don't need */ .readpages = ext3_readpages, /* BKL not held. Don't need */
.writepage = ext3_writepage, /* BKL not held. We take it */ .writepage = ext3_writepage, /* BKL not held. We take it */
.writepages = ext3_writepages, /* BKL not held. Don't need */ .writepages = ext3_writepages, /* BKL not held. Don't need */
.sync_page = block_sync_page, .sync_page = block_sync_page,
.prepare_write = ext3_prepare_write, /* BKL not held. We take it */ .prepare_write = ext3_prepare_write, /* BKL not held. We take it */
......
...@@ -325,7 +325,6 @@ int dbSync(struct inode *ipbmap) ...@@ -325,7 +325,6 @@ int dbSync(struct inode *ipbmap)
/* /*
* write out dirty pages of bmap * write out dirty pages of bmap
*/ */
filemap_fdatawait(ipbmap->i_mapping);
filemap_fdatawrite(ipbmap->i_mapping); filemap_fdatawrite(ipbmap->i_mapping);
filemap_fdatawait(ipbmap->i_mapping); filemap_fdatawait(ipbmap->i_mapping);
......
...@@ -281,7 +281,6 @@ int diSync(struct inode *ipimap) ...@@ -281,7 +281,6 @@ int diSync(struct inode *ipimap)
/* /*
* write out dirty pages of imap * write out dirty pages of imap
*/ */
filemap_fdatawait(ipimap->i_mapping);
filemap_fdatawrite(ipimap->i_mapping); filemap_fdatawrite(ipimap->i_mapping);
filemap_fdatawait(ipimap->i_mapping); filemap_fdatawait(ipimap->i_mapping);
...@@ -595,7 +594,6 @@ void diFreeSpecial(struct inode *ip) ...@@ -595,7 +594,6 @@ void diFreeSpecial(struct inode *ip)
jERROR(1, ("diFreeSpecial called with NULL ip!\n")); jERROR(1, ("diFreeSpecial called with NULL ip!\n"));
return; return;
} }
filemap_fdatawait(ip->i_mapping);
filemap_fdatawrite(ip->i_mapping); filemap_fdatawrite(ip->i_mapping);
filemap_fdatawait(ip->i_mapping); filemap_fdatawait(ip->i_mapping);
truncate_inode_pages(ip->i_mapping, 0); truncate_inode_pages(ip->i_mapping, 0);
......
...@@ -965,9 +965,6 @@ int lmLogSync(log_t * log, int nosyncwait) ...@@ -965,9 +965,6 @@ int lmLogSync(log_t * log, int nosyncwait)
* We need to make sure all of the "written" metapages * We need to make sure all of the "written" metapages
* actually make it to disk * actually make it to disk
*/ */
filemap_fdatawait(sbi->ipbmap->i_mapping);
filemap_fdatawait(sbi->ipimap->i_mapping);
filemap_fdatawait(sbi->direct_inode->i_mapping);
filemap_fdatawrite(sbi->ipbmap->i_mapping); filemap_fdatawrite(sbi->ipbmap->i_mapping);
filemap_fdatawrite(sbi->ipimap->i_mapping); filemap_fdatawrite(sbi->ipimap->i_mapping);
filemap_fdatawrite(sbi->direct_inode->i_mapping); filemap_fdatawrite(sbi->direct_inode->i_mapping);
......
...@@ -1165,7 +1165,6 @@ int txCommit(tid_t tid, /* transaction identifier */ ...@@ -1165,7 +1165,6 @@ int txCommit(tid_t tid, /* transaction identifier */
* *
* if ((!S_ISDIR(ip->i_mode)) * if ((!S_ISDIR(ip->i_mode))
* && (tblk->flag & COMMIT_DELETE) == 0) { * && (tblk->flag & COMMIT_DELETE) == 0) {
* filemap_fdatawait(ip->i_mapping);
* filemap_fdatawrite(ip->i_mapping); * filemap_fdatawrite(ip->i_mapping);
* filemap_fdatawait(ip->i_mapping); * filemap_fdatawait(ip->i_mapping);
* } * }
......
...@@ -112,7 +112,6 @@ int jfs_umount(struct super_block *sb) ...@@ -112,7 +112,6 @@ int jfs_umount(struct super_block *sb)
* Make sure all metadata makes it to disk before we mark * Make sure all metadata makes it to disk before we mark
* the superblock as clean * the superblock as clean
*/ */
filemap_fdatawait(sbi->direct_inode->i_mapping);
filemap_fdatawrite(sbi->direct_inode->i_mapping); filemap_fdatawrite(sbi->direct_inode->i_mapping);
filemap_fdatawait(sbi->direct_inode->i_mapping); filemap_fdatawait(sbi->direct_inode->i_mapping);
...@@ -159,7 +158,6 @@ int jfs_umount_rw(struct super_block *sb) ...@@ -159,7 +158,6 @@ int jfs_umount_rw(struct super_block *sb)
*/ */
dbSync(sbi->ipbmap); dbSync(sbi->ipbmap);
diSync(sbi->ipimap); diSync(sbi->ipimap);
filemap_fdatawait(sbi->direct_inode->i_mapping);
filemap_fdatawrite(sbi->direct_inode->i_mapping); filemap_fdatawrite(sbi->direct_inode->i_mapping);
filemap_fdatawait(sbi->direct_inode->i_mapping); filemap_fdatawait(sbi->direct_inode->i_mapping);
......
...@@ -146,7 +146,6 @@ static void jfs_put_super(struct super_block *sb) ...@@ -146,7 +146,6 @@ static void jfs_put_super(struct super_block *sb)
* We need to clean out the direct_inode pages since this inode * We need to clean out the direct_inode pages since this inode
* is not in the inode hash. * is not in the inode hash.
*/ */
filemap_fdatawait(sbi->direct_inode->i_mapping);
filemap_fdatawrite(sbi->direct_inode->i_mapping); filemap_fdatawrite(sbi->direct_inode->i_mapping);
filemap_fdatawait(sbi->direct_inode->i_mapping); filemap_fdatawait(sbi->direct_inode->i_mapping);
truncate_inode_pages(sbi->direct_mapping, 0); truncate_inode_pages(sbi->direct_mapping, 0);
...@@ -362,7 +361,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) ...@@ -362,7 +361,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
jERROR(1, ("jfs_umount failed with return code %d\n", rc)); jERROR(1, ("jfs_umount failed with return code %d\n", rc));
} }
out_mount_failed: out_mount_failed:
filemap_fdatawait(sbi->direct_inode->i_mapping);
filemap_fdatawrite(sbi->direct_inode->i_mapping); filemap_fdatawrite(sbi->direct_inode->i_mapping);
filemap_fdatawait(sbi->direct_inode->i_mapping); filemap_fdatawait(sbi->direct_inode->i_mapping);
truncate_inode_pages(sbi->direct_mapping, 0); truncate_inode_pages(sbi->direct_mapping, 0);
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/mpage.h> #include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/pagevec.h> #include <linux/pagevec.h>
/* /*
...@@ -530,6 +531,7 @@ mpage_writepages(struct address_space *mapping, ...@@ -530,6 +531,7 @@ mpage_writepages(struct address_space *mapping,
sector_t last_block_in_bio = 0; sector_t last_block_in_bio = 0;
int ret = 0; int ret = 0;
int done = 0; int done = 0;
int sync = called_for_sync();
struct pagevec pvec; struct pagevec pvec;
int (*writepage)(struct page *); int (*writepage)(struct page *);
...@@ -546,7 +548,7 @@ mpage_writepages(struct address_space *mapping, ...@@ -546,7 +548,7 @@ mpage_writepages(struct address_space *mapping,
struct page *page = list_entry(mapping->io_pages.prev, struct page *page = list_entry(mapping->io_pages.prev,
struct page, list); struct page, list);
list_del(&page->list); list_del(&page->list);
if (PageWriteback(page)) { if (PageWriteback(page) && !sync) {
if (PageDirty(page)) { if (PageDirty(page)) {
list_add(&page->list, &mapping->dirty_pages); list_add(&page->list, &mapping->dirty_pages);
continue; continue;
...@@ -565,6 +567,9 @@ mpage_writepages(struct address_space *mapping, ...@@ -565,6 +567,9 @@ mpage_writepages(struct address_space *mapping,
lock_page(page); lock_page(page);
if (sync)
wait_on_page_writeback(page);
if (page->mapping && !PageWriteback(page) && if (page->mapping && !PageWriteback(page) &&
TestClearPageDirty(page)) { TestClearPageDirty(page)) {
if (writepage) { if (writepage) {
...@@ -579,6 +584,10 @@ mpage_writepages(struct address_space *mapping, ...@@ -579,6 +584,10 @@ mpage_writepages(struct address_space *mapping,
pagevec_deactivate_inactive(&pvec); pagevec_deactivate_inactive(&pvec);
page = NULL; page = NULL;
} }
if (ret == -EAGAIN && page) {
__set_page_dirty_nobuffers(page);
ret = 0;
}
if (ret || (nr_to_write && --(*nr_to_write) <= 0)) if (ret || (nr_to_write && --(*nr_to_write) <= 0))
done = 1; done = 1;
} else { } else {
......
...@@ -279,10 +279,7 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl) ...@@ -279,10 +279,7 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
* Flush all pending writes before doing anything * Flush all pending writes before doing anything
* with locks.. * with locks..
*/ */
status = filemap_fdatawait(inode->i_mapping); status = filemap_fdatawrite(inode->i_mapping);
status2 = filemap_fdatawrite(inode->i_mapping);
if (!status)
status = status2;
down(&inode->i_sem); down(&inode->i_sem);
status2 = nfs_wb_all(inode); status2 = nfs_wb_all(inode);
if (!status) if (!status)
...@@ -308,7 +305,6 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl) ...@@ -308,7 +305,6 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
*/ */
out_ok: out_ok:
if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
filemap_fdatawait(inode->i_mapping);
filemap_fdatawrite(inode->i_mapping); filemap_fdatawrite(inode->i_mapping);
down(&inode->i_sem); down(&inode->i_sem);
nfs_wb_all(inode); /* we may have slept */ nfs_wb_all(inode); /* we may have slept */
......
...@@ -775,7 +775,6 @@ printk("nfs_setattr: revalidate failed, error=%d\n", error); ...@@ -775,7 +775,6 @@ printk("nfs_setattr: revalidate failed, error=%d\n", error);
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
attr->ia_valid &= ~ATTR_SIZE; attr->ia_valid &= ~ATTR_SIZE;
filemap_fdatawait(inode->i_mapping);
filemap_fdatawrite(inode->i_mapping); filemap_fdatawrite(inode->i_mapping);
error = nfs_wb_all(inode); error = nfs_wb_all(inode);
filemap_fdatawait(inode->i_mapping); filemap_fdatawait(inode->i_mapping);
......
...@@ -501,7 +501,6 @@ inline void nfsd_dosync(struct file *filp, struct dentry *dp, ...@@ -501,7 +501,6 @@ inline void nfsd_dosync(struct file *filp, struct dentry *dp,
struct inode *inode = dp->d_inode; struct inode *inode = dp->d_inode;
int (*fsync) (struct file *, struct dentry *, int); int (*fsync) (struct file *, struct dentry *, int);
filemap_fdatawait(inode->i_mapping);
filemap_fdatawrite(inode->i_mapping); filemap_fdatawrite(inode->i_mapping);
if (fop && (fsync = fop->fsync)) if (fop && (fsync = fop->fsync))
fsync(filp, dp, 0); fsync(filp, dp, 0);
......
...@@ -352,7 +352,6 @@ smb_file_release(struct inode *inode, struct file * file) ...@@ -352,7 +352,6 @@ smb_file_release(struct inode *inode, struct file * file)
/* We must flush any dirty pages now as we won't be able to /* We must flush any dirty pages now as we won't be able to
write anything after close. mmap can trigger this. write anything after close. mmap can trigger this.
"openers" should perhaps include mmap'ers ... */ "openers" should perhaps include mmap'ers ... */
filemap_fdatawait(inode->i_mapping);
filemap_fdatawrite(inode->i_mapping); filemap_fdatawrite(inode->i_mapping);
filemap_fdatawait(inode->i_mapping); filemap_fdatawait(inode->i_mapping);
smb_close(inode); smb_close(inode);
......
...@@ -650,7 +650,6 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr) ...@@ -650,7 +650,6 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
DENTRY_PATH(dentry), DENTRY_PATH(dentry),
(long) inode->i_size, (long) attr->ia_size); (long) inode->i_size, (long) attr->ia_size);
filemap_fdatawait(inode->i_mapping);
filemap_fdatawrite(inode->i_mapping); filemap_fdatawrite(inode->i_mapping);
filemap_fdatawait(inode->i_mapping); filemap_fdatawait(inode->i_mapping);
......
...@@ -208,7 +208,8 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err) ...@@ -208,7 +208,8 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err)
mark_buffer_dirty_inode(bh, inode); mark_buffer_dirty_inode(bh, inode);
udf_release_data(bh); udf_release_data(bh);
inode->i_data.a_ops->writepage(page); if (inode->i_data.a_ops->writepage(page) == -EAGAIN)
__set_page_dirty_nobuffers(page);
page_cache_release(page); page_cache_release(page);
mark_inode_dirty(inode); mark_inode_dirty(inode);
......
...@@ -405,6 +405,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) ...@@ -405,6 +405,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
#define PF_FREEZE 0x00010000 /* this task should be frozen for suspend */ #define PF_FREEZE 0x00010000 /* this task should be frozen for suspend */
#define PF_IOTHREAD 0x00020000 /* this thread is needed for doing I/O to swap */ #define PF_IOTHREAD 0x00020000 /* this thread is needed for doing I/O to swap */
#define PF_FROZEN 0x00040000 /* frozen for system suspend */ #define PF_FROZEN 0x00040000 /* frozen for system suspend */
#define PF_SYNC 0x00080000 /* performing fsync(), etc */
/* /*
* Ptrace flags * Ptrace flags
......
...@@ -72,4 +72,13 @@ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl ...@@ -72,4 +72,13 @@ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
read-only. */ read-only. */
/*
* Tell the writeback paths that they are being called for a "data integrity"
* operation such as fsync().
*/
static inline int called_for_sync(void)
{
return current->flags & PF_SYNC;
}
#endif /* WRITEBACK_H */ #endif /* WRITEBACK_H */
...@@ -464,31 +464,38 @@ int fail_writepage(struct page *page) ...@@ -464,31 +464,38 @@ int fail_writepage(struct page *page)
SetPageReferenced(page); SetPageReferenced(page);
} }
/* Set the page dirty again, unlock */
set_page_dirty(page);
unlock_page(page); unlock_page(page);
return 0; return -EAGAIN; /* It will be set dirty again */
} }
EXPORT_SYMBOL(fail_writepage); EXPORT_SYMBOL(fail_writepage);
/** /**
* filemap_fdatawrite - walk the list of dirty pages of the given address space * filemap_fdatawrite - start writeback against all of a mapping's dirty pages
* and writepage() all of them. * @mapping: address space structure to write
* *
* @mapping: address space structure to write * This is a "data integrity" operation, as opposed to a regular memory
* cleansing writeback. The difference between these two operations is that
* if a dirty page/buffer is encountered, it must be waited upon, and not just
* skipped over.
* *
* The PF_SYNC flag is set across this operation and the various functions
* which care about this distinction must use called_for_sync() to find out
* which behaviour they should implement.
*/ */
int filemap_fdatawrite(struct address_space *mapping) int filemap_fdatawrite(struct address_space *mapping)
{ {
return do_writepages(mapping, NULL); int ret;
current->flags |= PF_SYNC;
ret = do_writepages(mapping, NULL);
current->flags &= ~PF_SYNC;
return ret;
} }
/** /**
* filemap_fdatawait - walk the list of locked pages of the given address space * filemap_fdatawait - walk the list of locked pages of the given address
* and wait for all of them. * space and wait for all of them.
* * @mapping: address space structure to wait for
* @mapping: address space structure to wait for
*
*/ */
int filemap_fdatawait(struct address_space * mapping) int filemap_fdatawait(struct address_space * mapping)
{ {
...@@ -497,8 +504,9 @@ int filemap_fdatawait(struct address_space * mapping) ...@@ -497,8 +504,9 @@ int filemap_fdatawait(struct address_space * mapping)
write_lock(&mapping->page_lock); write_lock(&mapping->page_lock);
while (!list_empty(&mapping->locked_pages)) { while (!list_empty(&mapping->locked_pages)) {
struct page *page = list_entry(mapping->locked_pages.next, struct page, list); struct page *page;
page = list_entry(mapping->locked_pages.next,struct page,list);
list_del(&page->list); list_del(&page->list);
if (PageDirty(page)) if (PageDirty(page))
list_add(&page->list, &mapping->dirty_pages); list_add(&page->list, &mapping->dirty_pages);
......
...@@ -145,10 +145,7 @@ static int msync_interval(struct vm_area_struct * vma, ...@@ -145,10 +145,7 @@ static int msync_interval(struct vm_area_struct * vma,
int err; int err;
down(&inode->i_sem); down(&inode->i_sem);
ret = filemap_fdatawait(inode->i_mapping); ret = filemap_fdatawrite(inode->i_mapping);
err = filemap_fdatawrite(inode->i_mapping);
if (!ret)
ret = err;
if (flags & MS_SYNC) { if (flags & MS_SYNC) {
if (file->f_op && file->f_op->fsync) { if (file->f_op && file->f_op->fsync) {
err = file->f_op->fsync(file, file->f_dentry, 1); err = file->f_op->fsync(file, file->f_dentry, 1);
......
...@@ -350,10 +350,15 @@ int generic_vm_writeback(struct page *page, int *nr_to_write) ...@@ -350,10 +350,15 @@ int generic_vm_writeback(struct page *page, int *nr_to_write)
#if 0 #if 0
if (!PageWriteback(page) && PageDirty(page)) { if (!PageWriteback(page) && PageDirty(page)) {
lock_page(page); lock_page(page);
if (!PageWriteback(page) && TestClearPageDirty(page)) if (!PageWriteback(page) && TestClearPageDirty(page)) {
page->mapping->a_ops->writepage(page); int ret;
else
ret = page->mapping->a_ops->writepage(page);
if (ret == -EAGAIN)
__set_page_dirty_nobuffers(page);
} else {
unlock_page(page); unlock_page(page);
}
} }
#endif #endif
} }
...@@ -395,6 +400,10 @@ int write_one_page(struct page *page, int wait) ...@@ -395,6 +400,10 @@ int write_one_page(struct page *page, int wait)
page_cache_get(page); page_cache_get(page);
write_unlock(&mapping->page_lock); write_unlock(&mapping->page_lock);
ret = mapping->a_ops->writepage(page); ret = mapping->a_ops->writepage(page);
if (ret == -EAGAIN) {
__set_page_dirty_nobuffers(page);
ret = 0;
}
if (ret == 0 && wait) { if (ret == 0 && wait) {
wait_on_page_writeback(page); wait_on_page_writeback(page);
if (PageError(page)) if (PageError(page))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment