Commit 22f96b38 authored by Jens Axboe's avatar Jens Axboe

fs: add sync_file_range() helper

This just pulls out the ksys_sync_file_range() code to work on a struct
file instead of an fd, so we can use it elsewhere.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent de0617e4
...@@ -234,58 +234,10 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) ...@@ -234,58 +234,10 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
return do_fsync(fd, 1); return do_fsync(fd, 1);
} }
/* int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
* sys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
* zero then sys_sync_file_range() will operate from offset out to EOF.
*
* The flag bits are:
*
* SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
* before performing the write.
*
* SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
* range which are not presently under writeback. Note that this may block for
* significant periods due to exhaustion of disk request structures.
*
* SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
* after performing the write.
*
* Useful combinations of the flag bits are:
*
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
* in the range which were dirty on entry to sys_sync_file_range() are placed
* under writeout. This is a start-write-for-data-integrity operation.
*
* SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
* are not presently under writeout. This is an asynchronous flush-to-disk
* operation. Not suitable for data integrity operations.
*
* SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
* completion of writeout of all pages in the range. This will be used after an
* earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
* for that operation to complete and to return the result.
*
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
* a traditional sync() operation. This is a write-for-data-integrity operation
* which will ensure that all pages in the range which were dirty on entry to
* sys_sync_file_range() are committed to disk.
*
*
* SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
* I/O errors or ENOSPC conditions and will return those to the caller, after
* clearing the EIO and ENOSPC flags in the address_space.
*
* It should be noted that none of these operations write out the file's
* metadata. So unless the application is strictly performing overwrites of
* already-instantiated disk blocks, there are no guarantees here that the data
* will be available after a crash.
*/
int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
unsigned int flags) unsigned int flags)
{ {
int ret; int ret;
struct fd f;
struct address_space *mapping; struct address_space *mapping;
loff_t endbyte; /* inclusive */ loff_t endbyte; /* inclusive */
umode_t i_mode; umode_t i_mode;
...@@ -325,41 +277,96 @@ int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, ...@@ -325,41 +277,96 @@ int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
else else
endbyte--; /* inclusive */ endbyte--; /* inclusive */
ret = -EBADF; i_mode = file_inode(file)->i_mode;
f = fdget(fd);
if (!f.file)
goto out;
i_mode = file_inode(f.file)->i_mode;
ret = -ESPIPE; ret = -ESPIPE;
if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) &&
!S_ISLNK(i_mode)) !S_ISLNK(i_mode))
goto out_put; goto out;
mapping = f.file->f_mapping; mapping = file->f_mapping;
ret = 0; ret = 0;
if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
ret = file_fdatawait_range(f.file, offset, endbyte); ret = file_fdatawait_range(file, offset, endbyte);
if (ret < 0) if (ret < 0)
goto out_put; goto out;
} }
if (flags & SYNC_FILE_RANGE_WRITE) { if (flags & SYNC_FILE_RANGE_WRITE) {
ret = __filemap_fdatawrite_range(mapping, offset, endbyte, ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
WB_SYNC_NONE); WB_SYNC_NONE);
if (ret < 0) if (ret < 0)
goto out_put; goto out;
} }
if (flags & SYNC_FILE_RANGE_WAIT_AFTER) if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
ret = file_fdatawait_range(f.file, offset, endbyte); ret = file_fdatawait_range(file, offset, endbyte);
out_put:
fdput(f);
out: out:
return ret; return ret;
} }
/*
* sys_sync_file_range() permits finely controlled syncing over a segment of
* a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
* zero then sys_sync_file_range() will operate from offset out to EOF.
*
* The flag bits are:
*
* SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range
* before performing the write.
*
* SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
* range which are not presently under writeback. Note that this may block for
* significant periods due to exhaustion of disk request structures.
*
* SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
* after performing the write.
*
* Useful combinations of the flag bits are:
*
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages
* in the range which were dirty on entry to sys_sync_file_range() are placed
* under writeout. This is a start-write-for-data-integrity operation.
*
* SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which
* are not presently under writeout. This is an asynchronous flush-to-disk
* operation. Not suitable for data integrity operations.
*
* SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for
* completion of writeout of all pages in the range. This will be used after an
* earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait
* for that operation to complete and to return the result.
*
* SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER:
* a traditional sync() operation. This is a write-for-data-integrity operation
* which will ensure that all pages in the range which were dirty on entry to
* sys_sync_file_range() are committed to disk.
*
*
* SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any
* I/O errors or ENOSPC conditions and will return those to the caller, after
* clearing the EIO and ENOSPC flags in the address_space.
*
* It should be noted that none of these operations write out the file's
* metadata. So unless the application is strictly performing overwrites of
* already-instantiated disk blocks, there are no guarantees here that the data
* will be available after a crash.
*/
int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
unsigned int flags)
{
int ret;
struct fd f;
ret = -EBADF;
f = fdget(fd);
if (f.file)
ret = sync_file_range(f.file, offset, nbytes, flags);
fdput(f);
return ret;
}
SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
unsigned int, flags) unsigned int, flags)
{ {
......
...@@ -2785,6 +2785,9 @@ extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, ...@@ -2785,6 +2785,9 @@ extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
int datasync); int datasync);
extern int vfs_fsync(struct file *file, int datasync); extern int vfs_fsync(struct file *file, int datasync);
extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
unsigned int flags);
/* /*
* Sync the bytes written if this was a synchronous write. Expect ki_pos * Sync the bytes written if this was a synchronous write. Expect ki_pos
* to already be updated for the write, and will return either the amount * to already be updated for the write, and will return either the amount
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment