Commit 1c000719 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] msync correctness

A forward port.  At present, msync() does not report errors
from EIO or ENOSPC.  fsync() has the same bug for mapped pages
against the affected fd.

The patch correctly propagates these errors back up from
writepage so that fsync and msync correctly report errors.

It's fairly important - msync is the only way we have
of reporting ENOSPC against sparse mappings.

Of course, you can still silently lose your data if it's kswapd who
gets ENOSPC during writepage.  I have 3/4 of a patch for that.  It
records the data loss so that a later msync() will report the bad
news.

This patch also adds an implementation of msync(MS_ASYNC), because
it was easy.
parent 6c5e0f13
......@@ -195,11 +195,15 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
static int __block_fsync(struct inode * inode)
{
int ret;
filemap_fdatasync(inode->i_mapping);
ret = sync_buffers(inode->i_rdev, 1);
filemap_fdatawait(inode->i_mapping);
int ret, err;
ret = filemap_fdatasync(inode->i_mapping);
err = sync_buffers(inode->i_rdev, 1);
if (err && !ret)
ret = err;
err = filemap_fdatawait(inode->i_mapping);
if (err && !ret)
ret = err;
return ret;
}
......
......@@ -410,9 +410,9 @@ asmlinkage long sys_fsync(unsigned int fd)
struct file * file;
struct dentry * dentry;
struct inode * inode;
int err;
int ret, err;
err = -EBADF;
ret = -EBADF;
file = fget(fd);
if (!file)
goto out;
......@@ -420,21 +420,27 @@ asmlinkage long sys_fsync(unsigned int fd)
dentry = file->f_dentry;
inode = dentry->d_inode;
err = -EINVAL;
if (!file->f_op || !file->f_op->fsync)
ret = -EINVAL;
if (!file->f_op || !file->f_op->fsync) {
/* Why? We can still call filemap_fdatasync */
goto out_putf;
}
/* We need to protect against concurrent writers.. */
down(&inode->i_sem);
filemap_fdatasync(inode->i_mapping);
ret = filemap_fdatasync(inode->i_mapping);
err = file->f_op->fsync(file, dentry, 0);
filemap_fdatawait(inode->i_mapping);
if (err && !ret)
ret = err;
err = filemap_fdatawait(inode->i_mapping);
if (err && !ret)
ret = err;
up(&inode->i_sem);
out_putf:
fput(file);
out:
return err;
return ret;
}
asmlinkage long sys_fdatasync(unsigned int fd)
......@@ -442,9 +448,9 @@ asmlinkage long sys_fdatasync(unsigned int fd)
struct file * file;
struct dentry * dentry;
struct inode * inode;
int err;
int ret, err;
err = -EBADF;
ret = -EBADF;
file = fget(fd);
if (!file)
goto out;
......@@ -452,20 +458,24 @@ asmlinkage long sys_fdatasync(unsigned int fd)
dentry = file->f_dentry;
inode = dentry->d_inode;
err = -EINVAL;
ret = -EINVAL;
if (!file->f_op || !file->f_op->fsync)
goto out_putf;
down(&inode->i_sem);
filemap_fdatasync(inode->i_mapping);
ret = filemap_fdatasync(inode->i_mapping);
err = file->f_op->fsync(file, dentry, 1);
filemap_fdatawait(inode->i_mapping);
if (err && !ret)
ret = err;
err = filemap_fdatawait(inode->i_mapping);
if (err && !ret)
ret = err;
up(&inode->i_sem);
out_putf:
fput(file);
out:
return err;
return ret;
}
/* After several hours of tedious analysis, the following hash
......
......@@ -244,6 +244,7 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode * inode = filp->f_dentry->d_inode;
int status = 0;
int status2;
dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
inode->i_sb->s_id, inode->i_ino,
......@@ -278,11 +279,15 @@ nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
* Flush all pending writes before doing anything
* with locks..
*/
filemap_fdatasync(inode->i_mapping);
status = filemap_fdatasync(inode->i_mapping);
down(&inode->i_sem);
status = nfs_wb_all(inode);
status2 = nfs_wb_all(inode);
if (status2 && !status)
status = status2;
up(&inode->i_sem);
filemap_fdatawait(inode->i_mapping);
status2 = filemap_fdatawait(inode->i_mapping);
if (status2 && !status)
status = status2;
if (status < 0)
return status;
......
......@@ -1230,8 +1230,8 @@ static inline int fsync_inode_data_buffers(struct inode *inode)
return fsync_buffers_list(&inode->i_dirty_data_buffers);
}
extern int inode_has_buffers(struct inode *);
extern void filemap_fdatasync(struct address_space *);
extern void filemap_fdatawait(struct address_space *);
extern int filemap_fdatasync(struct address_space *);
extern int filemap_fdatawait(struct address_space *);
extern void sync_supers(kdev_t);
extern int bmap(struct inode *, int);
extern int notify_change(struct dentry *, struct iattr *);
......
......@@ -542,8 +542,9 @@ EXPORT_SYMBOL(fail_writepage);
* @mapping: address space structure to write
*
*/
void filemap_fdatasync(struct address_space * mapping)
int filemap_fdatasync(struct address_space * mapping)
{
int ret = 0;
int (*writepage)(struct page *) = mapping->a_ops->writepage;
spin_lock(&pagecache_lock);
......@@ -563,8 +564,11 @@ void filemap_fdatasync(struct address_space * mapping)
lock_page(page);
if (PageDirty(page)) {
int err;
ClearPageDirty(page);
writepage(page);
err = writepage(page);
if (err && !ret)
ret = err;
} else
UnlockPage(page);
......@@ -572,6 +576,7 @@ void filemap_fdatasync(struct address_space * mapping)
spin_lock(&pagecache_lock);
}
spin_unlock(&pagecache_lock);
return ret;
}
/**
......@@ -581,8 +586,10 @@ void filemap_fdatasync(struct address_space * mapping)
* @mapping: address space structure to wait for
*
*/
void filemap_fdatawait(struct address_space * mapping)
int filemap_fdatawait(struct address_space * mapping)
{
int ret = 0;
spin_lock(&pagecache_lock);
while (!list_empty(&mapping->locked_pages)) {
......@@ -598,11 +605,14 @@ void filemap_fdatawait(struct address_space * mapping)
spin_unlock(&pagecache_lock);
___wait_on_page(page);
if (PageError(page))
ret = -EIO;
page_cache_release(page);
spin_lock(&pagecache_lock);
}
spin_unlock(&pagecache_lock);
return ret;
}
/*
......@@ -1479,12 +1489,14 @@ static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, si
goto out_free;
/*
* Flush to disk exlusively the _data_, metadata must remains
* Flush to disk exclusively the _data_, metadata must remain
* completly asynchronous or performance will go to /dev/null.
*/
filemap_fdatasync(mapping);
retval = fsync_inode_data_buffers(inode);
filemap_fdatawait(mapping);
retval = filemap_fdatasync(mapping);
if (retval == 0)
retval = fsync_inode_data_buffers(inode);
if (retval == 0)
retval = filemap_fdatawait(mapping);
if (retval < 0)
goto out_free;
......@@ -2101,26 +2113,45 @@ int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
* The msync() system call.
*/
/*
* MS_SYNC syncs the entire file - including mappings.
*
* MS_ASYNC initiates writeout of just the dirty mapped data.
* This provides no guarantee of file integrity - things like indirect
* blocks may not have started writeout. MS_ASYNC is primarily useful
* where the application knows that it has finished with the data and
* wishes to intelligently schedule its own I/O traffic.
*/
static int msync_interval(struct vm_area_struct * vma,
unsigned long start, unsigned long end, int flags)
{
int ret = 0;
struct file * file = vma->vm_file;
if (file && (vma->vm_flags & VM_SHARED)) {
int error;
error = filemap_sync(vma, start, end-start, flags);
ret = filemap_sync(vma, start, end-start, flags);
if (!error && (flags & MS_SYNC)) {
if (!ret && (flags & (MS_SYNC|MS_ASYNC))) {
struct inode * inode = file->f_dentry->d_inode;
down(&inode->i_sem);
filemap_fdatasync(inode->i_mapping);
if (file->f_op && file->f_op->fsync)
error = file->f_op->fsync(file, file->f_dentry, 1);
filemap_fdatawait(inode->i_mapping);
ret = filemap_fdatasync(inode->i_mapping);
if (flags & MS_SYNC) {
int err;
if (file->f_op && file->f_op->fsync) {
err = file->f_op->fsync(file, file->f_dentry, 1);
if (err && !ret)
ret = err;
}
err = filemap_fdatawait(inode->i_mapping);
if (err && !ret)
ret = err;
}
up(&inode->i_sem);
}
return error;
}
return 0;
return ret;
}
asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment