Commit 6d4572a9 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba

btrfs: allow btrfs_truncate_block() to fallback to nocow for data space reservation

[BUG]
When the data space is exhausted, even if the inode has NOCOW attribute,
we will still refuse to truncate unaligned range due to ENOSPC.

The following script can reproduce it pretty easily:
  #!/bin/bash

  dev=/dev/test/test
  mnt=/mnt/btrfs

  umount $dev &> /dev/null
  umount $mnt &> /dev/null

  mkfs.btrfs -f $dev -b 1G
  mount -o nospace_cache $dev $mnt
  touch $mnt/foobar
  chattr +C $mnt/foobar

  xfs_io -f -c "pwrite -b 4k 0 4k" $mnt/foobar > /dev/null
  xfs_io -f -c "pwrite -b 4k 0 1G" $mnt/padding &> /dev/null
  sync

  xfs_io -c "fpunch 0 2k" $mnt/foobar
  umount $mnt

Currently this will fail at the fpunch part.

[CAUSE]
Because btrfs_truncate_block() always reserves space without checking
the NOCOW attribute.

Since the writeback path follows NOCOW bit, we only need to bother the
space reservation code in btrfs_truncate_block().

[FIX]
Make btrfs_truncate_block() follow btrfs_buffered_write() to try to
reserve data space first, and fall back to NOCOW check only when we
don't have enough space.

Such always-try-reserve is an optimization introduced in
btrfs_buffered_write(), to avoid expensive btrfs_check_can_nocow() call.

This patch will export check_can_nocow() as btrfs_check_can_nocow(), and
use it in btrfs_truncate_block() to fix the problem.
Reported-by: default avatarMartin Doucha <martin.doucha@suse.com>
Reviewed-by: default avatarFilipe Manana <fdmanana@suse.com>
Reviewed-by: default avatarAnand Jain <anand.jain@oracle.com>
Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent b547a88e
...@@ -3033,6 +3033,8 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages, ...@@ -3033,6 +3033,8 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes, size_t num_pages, loff_t pos, size_t write_bytes,
struct extent_state **cached); struct extent_state **cached);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end); int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes, bool nowait);
/* tree-defrag.c */ /* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
......
...@@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, ...@@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
return ret; return ret;
} }
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos, int btrfs_check_can_nocow(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes, bool nowait) size_t *write_bytes, bool nowait)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
...@@ -1649,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb, ...@@ -1649,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
if (ret < 0) { if (ret < 0) {
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) && BTRFS_INODE_PREALLOC)) &&
check_can_nocow(BTRFS_I(inode), pos, btrfs_check_can_nocow(BTRFS_I(inode), pos,
&write_bytes, false) > 0) { &write_bytes, false) > 0) {
/* /*
* For nodata cow case, no need to reserve * For nodata cow case, no need to reserve
...@@ -1927,7 +1927,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, ...@@ -1927,7 +1927,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
*/ */
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) || BTRFS_INODE_PREALLOC)) ||
check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes, btrfs_check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes,
true) <= 0) { true) <= 0) {
inode_unlock(inode); inode_unlock(inode);
return -EAGAIN; return -EAGAIN;
......
...@@ -4512,11 +4512,13 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, ...@@ -4512,11 +4512,13 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL; struct extent_changeset *data_reserved = NULL;
char *kaddr; char *kaddr;
bool only_release_metadata = false;
u32 blocksize = fs_info->sectorsize; u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT; pgoff_t index = from >> PAGE_SHIFT;
unsigned offset = from & (blocksize - 1); unsigned offset = from & (blocksize - 1);
struct page *page; struct page *page;
gfp_t mask = btrfs_alloc_write_mask(mapping); gfp_t mask = btrfs_alloc_write_mask(mapping);
size_t write_bytes = blocksize;
int ret = 0; int ret = 0;
u64 block_start; u64 block_start;
u64 block_end; u64 block_end;
...@@ -4528,11 +4530,27 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, ...@@ -4528,11 +4530,27 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
block_start = round_down(from, blocksize); block_start = round_down(from, blocksize);
block_end = block_start + blocksize - 1; block_end = block_start + blocksize - 1;
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
ret = btrfs_check_data_free_space(inode, &data_reserved, block_start,
blocksize);
if (ret < 0) {
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
BTRFS_INODE_PREALLOC)) &&
btrfs_check_can_nocow(BTRFS_I(inode), block_start,
&write_bytes, false) > 0) {
/* For nocow case, no need to reserve data space */
only_release_metadata = true;
} else {
goto out;
}
}
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), blocksize);
if (ret < 0) {
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode, data_reserved,
block_start, blocksize); block_start, blocksize);
if (ret)
goto out; goto out;
}
again: again:
page = find_or_create_page(mapping, index, mask); page = find_or_create_page(mapping, index, mask);
if (!page) { if (!page) {
...@@ -4601,14 +4619,26 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, ...@@ -4601,14 +4619,26 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
set_page_dirty(page); set_page_dirty(page);
unlock_extent_cached(io_tree, block_start, block_end, &cached_state); unlock_extent_cached(io_tree, block_start, block_end, &cached_state);
if (only_release_metadata)
set_extent_bit(&BTRFS_I(inode)->io_tree, block_start,
block_end, EXTENT_NORESERVE, NULL, NULL,
GFP_NOFS);
out_unlock: out_unlock:
if (ret) if (ret) {
btrfs_delalloc_release_space(inode, data_reserved, block_start, if (only_release_metadata)
btrfs_delalloc_release_metadata(BTRFS_I(inode),
blocksize, true); blocksize, true);
else
btrfs_delalloc_release_space(inode, data_reserved,
block_start, blocksize, true);
}
btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize);
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
out: out:
if (only_release_metadata)
btrfs_drew_write_unlock(&BTRFS_I(inode)->root->snapshot_lock);
extent_changeset_free(data_reserved); extent_changeset_free(data_reserved);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment