Commit cb843a6f authored by Chris Mason's avatar Chris Mason

Btrfs: O_DIRECT writes via buffered writes + invaldiate

This reworks the btrfs O_DIRECT write code a bit.  It had always fallen
back to buffered IO and done an invalidate, but needed to be updated
for the data=ordered code.  The invalidate wasn't actually removing pages
because they were still inside an ordered extent.

This also combines the O_DIRECT/O_SYNC paths where possible, and kicks
off IO in the main btrfs_file_write loop to keep the pipe down the the
disk full as we process long writes.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 323ac95b
...@@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -905,6 +905,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
struct page *pinned[2]; struct page *pinned[2];
unsigned long first_index; unsigned long first_index;
unsigned long last_index; unsigned long last_index;
int will_write;
will_write = ((file->f_flags & O_SYNC) || IS_SYNC(inode) ||
(file->f_flags & O_DIRECT));
nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
PAGE_CACHE_SIZE / (sizeof(struct page *))); PAGE_CACHE_SIZE / (sizeof(struct page *)));
...@@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -1001,15 +1005,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
if (ret) if (ret)
goto out; goto out;
if (will_write) {
btrfs_fdatawrite_range(inode->i_mapping, pos,
pos + write_bytes - 1,
WB_SYNC_NONE);
} else {
balance_dirty_pages_ratelimited_nr(inode->i_mapping,
num_pages);
if (num_pages <
(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
btrfs_throttle(root);
}
buf += write_bytes; buf += write_bytes;
count -= write_bytes; count -= write_bytes;
pos += write_bytes; pos += write_bytes;
num_written += write_bytes; num_written += write_bytes;
balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
btrfs_throttle(root);
cond_resched(); cond_resched();
} }
out: out:
...@@ -1023,37 +1036,30 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, ...@@ -1023,37 +1036,30 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
page_cache_release(pinned[1]); page_cache_release(pinned[1]);
*ppos = pos; *ppos = pos;
if (num_written > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { if (num_written > 0 && will_write) {
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
err = btrfs_fdatawrite_range(inode->i_mapping, start_pos, err = btrfs_wait_ordered_range(inode, start_pos, num_written);
start_pos + num_written -1, if (err)
WB_SYNC_NONE);
if (err < 0)
num_written = err;
err = btrfs_wait_on_page_writeback_range(inode->i_mapping,
start_pos, start_pos + num_written - 1);
if (err < 0)
num_written = err; num_written = err;
if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) {
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
ret = btrfs_log_dentry_safe(trans, root, file->f_dentry); ret = btrfs_log_dentry_safe(trans, root,
file->f_dentry);
if (ret == 0) { if (ret == 0) {
btrfs_sync_log(trans, root); btrfs_sync_log(trans, root);
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
} else { } else {
btrfs_commit_transaction(trans, root); btrfs_commit_transaction(trans, root);
} }
} else if (num_written > 0 && (file->f_flags & O_DIRECT)) { }
do_sync_mapping_range(inode->i_mapping, start_pos, if (file->f_flags & O_DIRECT) {
start_pos + num_written - 1,
SYNC_FILE_RANGE_WRITE |
SYNC_FILE_RANGE_WAIT_AFTER);
invalidate_mapping_pages(inode->i_mapping, invalidate_mapping_pages(inode->i_mapping,
start_pos >> PAGE_CACHE_SHIFT, start_pos >> PAGE_CACHE_SHIFT,
(start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
} }
}
current->backing_dev_info = NULL; current->backing_dev_info = NULL;
return num_written ? num_written : err; return num_written ? num_written : err;
} }
......
...@@ -397,7 +397,7 @@ void btrfs_start_ordered_extent(struct inode *inode, ...@@ -397,7 +397,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
/* /*
* Used to wait on ordered extents across a large range of bytes. * Used to wait on ordered extents across a large range of bytes.
*/ */
void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{ {
u64 end; u64 end;
u64 orig_end; u64 orig_end;
...@@ -451,6 +451,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) ...@@ -451,6 +451,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
(unsigned long long)orig_end); (unsigned long long)orig_end);
goto again; goto again;
} }
return 0;
} }
/* /*
......
...@@ -135,7 +135,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, ...@@ -135,7 +135,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset); u64 file_offset);
void btrfs_start_ordered_extent(struct inode *inode, void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait); struct btrfs_ordered_extent *entry, int wait);
void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent * struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
int btrfs_ordered_update_i_size(struct inode *inode, int btrfs_ordered_update_i_size(struct inode *inode,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment