Commit 74163da7 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fallocate fixes

- fpunch wasn't always correctly updating i_size - when we drop buffered
  writes that were extending a file, we become responsible for writing
  i_size.

- fzero was sometimes zeroing out more data that it should have -
  block_start and block_end were being rounded in the wrong directions
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent 0397a2e8
...@@ -2296,6 +2296,14 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, ...@@ -2296,6 +2296,14 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
s->s[i].state = SECTOR_UNALLOCATED; s->s[i].state = SECTOR_UNALLOCATED;
} }
/*
* Caller needs to know whether this page will be written out by
* writeback - doing an i_size update if necessary - or whether it will
* be responsible for the i_size update:
*/
ret = s->s[(min_t(u64, inode->v.i_size - (index << PAGE_SHIFT),
PAGE_SIZE) - 1) >> 9].state >= SECTOR_DIRTY;
zero_user_segment(page, start_offset, end_offset); zero_user_segment(page, start_offset, end_offset);
/* /*
...@@ -2304,8 +2312,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode, ...@@ -2304,8 +2312,7 @@ static int __bch2_truncate_page(struct bch_inode_info *inode,
* XXX: because we aren't currently tracking whether the page has actual * XXX: because we aren't currently tracking whether the page has actual
* data in it (vs. just 0s, or only partially written) this wrong. ick. * data in it (vs. just 0s, or only partially written) this wrong. ick.
*/ */
ret = bch2_get_page_disk_reservation(c, inode, page, false); BUG_ON(bch2_get_page_disk_reservation(c, inode, page, false));
BUG_ON(ret);
/* /*
* This removes any writeable userspace mappings; we need to force * This removes any writeable userspace mappings; we need to force
...@@ -2327,6 +2334,20 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from) ...@@ -2327,6 +2334,20 @@ static int bch2_truncate_page(struct bch_inode_info *inode, loff_t from)
from, round_up(from, PAGE_SIZE)); from, round_up(from, PAGE_SIZE));
} }
static int bch2_truncate_pages(struct bch_inode_info *inode,
loff_t start, loff_t end)
{
int ret = __bch2_truncate_page(inode, start >> PAGE_SHIFT,
start, end);
if (ret >= 0 &&
start >> PAGE_SHIFT != end >> PAGE_SHIFT)
ret = __bch2_truncate_page(inode,
end >> PAGE_SHIFT,
start, end);
return ret;
}
static int bch2_extend(struct mnt_idmap *idmap, static int bch2_extend(struct mnt_idmap *idmap,
struct bch_inode_info *inode, struct bch_inode_info *inode,
struct bch_inode_unpacked *inode_u, struct bch_inode_unpacked *inode_u,
...@@ -2417,7 +2438,7 @@ int bch2_truncate(struct mnt_idmap *idmap, ...@@ -2417,7 +2438,7 @@ int bch2_truncate(struct mnt_idmap *idmap,
iattr->ia_valid &= ~ATTR_SIZE; iattr->ia_valid &= ~ATTR_SIZE;
ret = bch2_truncate_page(inode, iattr->ia_size); ret = bch2_truncate_page(inode, iattr->ia_size);
if (unlikely(ret)) if (unlikely(ret < 0))
goto err; goto err;
/* /*
...@@ -2483,48 +2504,39 @@ static int inode_update_times_fn(struct bch_inode_info *inode, ...@@ -2483,48 +2504,39 @@ static int inode_update_times_fn(struct bch_inode_info *inode,
static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len) static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len)
{ {
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
u64 discard_start = round_up(offset, block_bytes(c)) >> 9; u64 end = offset + len;
u64 discard_end = round_down(offset + len, block_bytes(c)) >> 9; u64 block_start = round_up(offset, block_bytes(c));
u64 block_end = round_down(end, block_bytes(c));
bool truncated_last_page;
int ret = 0; int ret = 0;
inode_lock(&inode->v); ret = bch2_truncate_pages(inode, offset, end);
inode_dio_wait(&inode->v); if (unlikely(ret < 0))
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
ret = __bch2_truncate_page(inode,
offset >> PAGE_SHIFT,
offset, offset + len);
if (unlikely(ret))
goto err; goto err;
if (offset >> PAGE_SHIFT != truncated_last_page = ret;
(offset + len) >> PAGE_SHIFT) {
ret = __bch2_truncate_page(inode,
(offset + len) >> PAGE_SHIFT,
offset, offset + len);
if (unlikely(ret))
goto err;
}
truncate_pagecache_range(&inode->v, offset, offset + len - 1); truncate_pagecache_range(&inode->v, offset, end - 1);
if (discard_start < discard_end) { if (block_start < block_end ) {
s64 i_sectors_delta = 0; s64 i_sectors_delta = 0;
ret = bch2_fpunch(c, inode_inum(inode), ret = bch2_fpunch(c, inode_inum(inode),
discard_start, discard_end, block_start >> 9, block_end >> 9,
&i_sectors_delta); &i_sectors_delta);
i_sectors_acct(c, inode, NULL, i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta);
} }
mutex_lock(&inode->ei_update_lock); mutex_lock(&inode->ei_update_lock);
if (end >= inode->v.i_size && !truncated_last_page) {
ret = bch2_write_inode_size(c, inode, inode->v.i_size,
ATTR_MTIME|ATTR_CTIME);
} else {
ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL, ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
ATTR_MTIME|ATTR_CTIME) ?: ret; ATTR_MTIME|ATTR_CTIME);
}
mutex_unlock(&inode->ei_update_lock); mutex_unlock(&inode->ei_update_lock);
err: err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret; return ret;
} }
...@@ -2544,31 +2556,18 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ...@@ -2544,31 +2556,18 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
if ((offset | len) & (block_bytes(c) - 1)) if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL; return -EINVAL;
/*
* We need i_mutex to keep the page cache consistent with the extents
* btree, and the btree consistent with i_size - we don't need outside
* locking for the extents btree itself, because we're using linked
* iterators
*/
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
if (insert) { if (insert) {
ret = -EFBIG;
if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len) if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
goto err; return -EFBIG;
ret = -EINVAL;
if (offset >= inode->v.i_size) if (offset >= inode->v.i_size)
goto err; return -EINVAL;
src_start = U64_MAX; src_start = U64_MAX;
shift = len; shift = len;
} else { } else {
ret = -EINVAL;
if (offset + len >= inode->v.i_size) if (offset + len >= inode->v.i_size)
goto err; return -EINVAL;
src_start = offset + len; src_start = offset + len;
shift = -len; shift = -len;
...@@ -2578,7 +2577,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ...@@ -2578,7 +2577,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX); ret = write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
if (ret) if (ret)
goto err; return ret;
if (insert) { if (insert) {
i_size_write(&inode->v, new_size); i_size_write(&inode->v, new_size);
...@@ -2595,7 +2594,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ...@@ -2595,7 +2594,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
i_sectors_acct(c, inode, NULL, i_sectors_delta); i_sectors_acct(c, inode, NULL, i_sectors_delta);
if (ret) if (ret)
goto err; return ret;
} }
bch2_bkey_buf_init(&copy); bch2_bkey_buf_init(&copy);
...@@ -2708,18 +2707,19 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode, ...@@ -2708,18 +2707,19 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
bch2_bkey_buf_exit(&copy, c); bch2_bkey_buf_exit(&copy, c);
if (ret) if (ret)
goto err; return ret;
mutex_lock(&inode->ei_update_lock);
if (!insert) { if (!insert) {
i_size_write(&inode->v, new_size); i_size_write(&inode->v, new_size);
mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, new_size, ret = bch2_write_inode_size(c, inode, new_size,
ATTR_MTIME|ATTR_CTIME); ATTR_MTIME|ATTR_CTIME);
mutex_unlock(&inode->ei_update_lock); } else {
/* We need an inode update to update bi_journal_seq for fsync: */
ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
ATTR_MTIME|ATTR_CTIME);
} }
err: mutex_unlock(&inode->ei_update_lock);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret; return ret;
} }
...@@ -2814,6 +2814,17 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, ...@@ -2814,6 +2814,17 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if (ret == -EINTR) if (ret == -EINTR)
ret = 0; ret = 0;
} }
if (ret == -ENOSPC && (mode & FALLOC_FL_ZERO_RANGE)) {
struct quota_res quota_res = { 0 };
s64 i_sectors_delta = 0;
bch2_fpunch_at(&trans, &iter, inode_inum(inode),
end_sector, &i_sectors_delta);
i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
bch2_quota_reservation_put(c, inode, &quota_res);
}
bch2_trans_iter_exit(&trans, &iter); bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
return ret; return ret;
...@@ -2822,77 +2833,58 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, ...@@ -2822,77 +2833,58 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
static long bchfs_fallocate(struct bch_inode_info *inode, int mode, static long bchfs_fallocate(struct bch_inode_info *inode, int mode,
loff_t offset, loff_t len) loff_t offset, loff_t len)
{ {
struct address_space *mapping = inode->v.i_mapping;
struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_fs *c = inode->v.i_sb->s_fs_info;
loff_t end = offset + len; u64 end = offset + len;
loff_t block_start = round_down(offset, block_bytes(c)); u64 block_start = round_down(offset, block_bytes(c));
loff_t block_end = round_up(end, block_bytes(c)); u64 block_end = round_up(end, block_bytes(c));
int ret; bool truncated_last_page = false;
int ret, ret2 = 0;
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) { if (!(mode & FALLOC_FL_KEEP_SIZE) && end > inode->v.i_size) {
ret = inode_newsize_ok(&inode->v, end); ret = inode_newsize_ok(&inode->v, end);
if (ret) if (ret)
goto err; return ret;
} }
if (mode & FALLOC_FL_ZERO_RANGE) { if (mode & FALLOC_FL_ZERO_RANGE) {
ret = __bch2_truncate_page(inode, ret = bch2_truncate_pages(inode, offset, end);
offset >> PAGE_SHIFT, if (unlikely(ret < 0))
offset, end); return ret;
if (!ret &&
offset >> PAGE_SHIFT != end >> PAGE_SHIFT)
ret = __bch2_truncate_page(inode,
end >> PAGE_SHIFT,
offset, end);
if (unlikely(ret)) truncated_last_page = ret;
goto err;
truncate_pagecache_range(&inode->v, offset, end - 1); truncate_pagecache_range(&inode->v, offset, end - 1);
block_start = round_up(offset, block_bytes(c));
block_end = round_down(end, block_bytes(c));
} }
ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9); ret = __bchfs_fallocate(inode, mode, block_start >> 9, block_end >> 9);
if (ret)
goto err;
/*
* Do we need to extend the file?
*
* If we zeroed up to the end of the file, we dropped whatever writes
* were going to write out the current i_size, so we have to extend
* manually even if FL_KEEP_SIZE was set:
*/
if (end >= inode->v.i_size &&
(!(mode & FALLOC_FL_KEEP_SIZE) ||
(mode & FALLOC_FL_ZERO_RANGE))) {
/* /*
* Sync existing appends before extending i_size, * On -ENOSPC in ZERO_RANGE mode, we still want to do the inode update,
* as in bch2_extend(): * so that the VFS cache i_size is consistent with the btree i_size:
*/ */
ret = filemap_write_and_wait_range(mapping, if (ret &&
inode->ei_inode.bi_size, S64_MAX); !(ret == -ENOSPC && (mode & FALLOC_FL_ZERO_RANGE)))
if (ret) return ret;
goto err;
if (mode & FALLOC_FL_KEEP_SIZE) if (mode & FALLOC_FL_KEEP_SIZE && end > inode->v.i_size)
end = inode->v.i_size; end = inode->v.i_size;
else
if (end >= inode->v.i_size &&
(((mode & FALLOC_FL_ZERO_RANGE) && !truncated_last_page) ||
!(mode & FALLOC_FL_KEEP_SIZE))) {
spin_lock(&inode->v.i_lock);
i_size_write(&inode->v, end); i_size_write(&inode->v, end);
spin_unlock(&inode->v.i_lock);
mutex_lock(&inode->ei_update_lock); mutex_lock(&inode->ei_update_lock);
ret = bch2_write_inode_size(c, inode, end, 0); ret2 = bch2_write_inode_size(c, inode, end, 0);
mutex_unlock(&inode->ei_update_lock); mutex_unlock(&inode->ei_update_lock);
} }
err:
bch2_pagecache_block_put(&inode->ei_pagecache_lock); return ret ?: ret2;
inode_unlock(&inode->v);
return ret;
} }
long bch2_fallocate_dispatch(struct file *file, int mode, long bch2_fallocate_dispatch(struct file *file, int mode,
...@@ -2905,6 +2897,10 @@ long bch2_fallocate_dispatch(struct file *file, int mode, ...@@ -2905,6 +2897,10 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
if (!percpu_ref_tryget(&c->writes)) if (!percpu_ref_tryget(&c->writes))
return -EROFS; return -EROFS;
inode_lock(&inode->v);
inode_dio_wait(&inode->v);
bch2_pagecache_block_get(&inode->ei_pagecache_lock);
if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE))) if (!(mode & ~(FALLOC_FL_KEEP_SIZE|FALLOC_FL_ZERO_RANGE)))
ret = bchfs_fallocate(inode, mode, offset, len); ret = bchfs_fallocate(inode, mode, offset, len);
else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE)) else if (mode == (FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE))
...@@ -2916,6 +2912,9 @@ long bch2_fallocate_dispatch(struct file *file, int mode, ...@@ -2916,6 +2912,9 @@ long bch2_fallocate_dispatch(struct file *file, int mode,
else else
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
percpu_ref_put(&c->writes); percpu_ref_put(&c->writes);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment