Commit 23e3337f authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba

btrfs: reset last_reflink_trans after fsyncing inode

When an inode has a last_reflink_trans matching the current transaction,
we have to take special care when logging its checksums in order to
avoid getting checksum items with overlapping ranges in a log tree,
which could result in missing checksums after log replay (more on that
in the changelogs of commit 40e046ac ("Btrfs: fix missing data
checksums after replaying a log tree") and commit e289f03e ("btrfs:
fix corrupt log due to concurrent fsync of inodes with shared extents")).
We also need to make sure a full fsync will copy all old file extent
items it finds in modified leaves, because they might have been copied
from some other inode.

However once we fsync an inode, we don't need to keep paying the price of
that extra special care in future fsyncs done in the same transaction,
unless the inode is used for another reflink operation or the full sync
flag is set on it (truncate, failure to allocate extent maps for holes,
and other exceptional and infrequent cases).

So after we fsync an inode reset its last_unlink_trans to zero. In case
another reflink happens, we continue to update the last_reflink_trans of
the inode, just as before. Also set last_reflink_trans to the generation
of the last transaction that modified the inode whenever we need to set
the full sync flag on the inode, just like when we need to load an inode
from disk after eviction.
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 96acb375
...@@ -341,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode) ...@@ -341,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
spin_unlock(&inode->lock); spin_unlock(&inode->lock);
} }
/*
* Should be called while holding the inode's VFS lock in exclusive mode or in a
* context where no one else can access the inode concurrently (during inode
* creation or when loading an inode from disk).
*/
static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
{
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
/*
* The inode may have been part of a reflink operation in the last
* transaction that modified it, and then a fsync has reset the
* last_reflink_trans to avoid subsequent fsyncs in the same
* transaction to do unnecessary work. So update last_reflink_trans
* to the last_trans value (we have to be pessimistic and assume a
* reflink happened).
*
* The ->last_trans is protected by the inode's spinlock and we can
* have a concurrent ordered extent completion update it. Also set
* last_reflink_trans to ->last_trans only if the former is less than
* the later, because we can be called in a context where
* last_reflink_trans was set to the current transaction generation
* while ->last_trans was not yet updated in the current transaction,
* and therefore has a lower value.
*/
spin_lock(&inode->lock);
if (inode->last_reflink_trans < inode->last_trans)
inode->last_reflink_trans = inode->last_trans;
spin_unlock(&inode->lock);
}
static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{ {
bool ret = false; bool ret = false;
......
...@@ -2514,7 +2514,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, ...@@ -2514,7 +2514,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
hole_em = alloc_extent_map(); hole_em = alloc_extent_map();
if (!hole_em) { if (!hole_em) {
btrfs_drop_extent_cache(inode, offset, end - 1, 0); btrfs_drop_extent_cache(inode, offset, end - 1, 0);
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); btrfs_set_inode_full_sync(inode);
} else { } else {
hole_em->start = offset; hole_em->start = offset;
hole_em->len = end - offset; hole_em->len = end - offset;
...@@ -2535,8 +2535,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, ...@@ -2535,8 +2535,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
} while (ret == -EEXIST); } while (ret == -EEXIST);
free_extent_map(hole_em); free_extent_map(hole_em);
if (ret) if (ret)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, btrfs_set_inode_full_sync(inode);
&inode->runtime_flags);
} }
return 0; return 0;
...@@ -2890,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode, ...@@ -2890,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
* maps for the replacement extents (or holes). * maps for the replacement extents (or holes).
*/ */
if (extent_info && !extent_info->is_new_extent) if (extent_info && !extent_info->is_new_extent)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); btrfs_set_inode_full_sync(inode);
if (ret) if (ret)
goto out_trans; goto out_trans;
......
...@@ -418,7 +418,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size, ...@@ -418,7 +418,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 size,
goto out; goto out;
} }
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); btrfs_set_inode_full_sync(inode);
out: out:
/* /*
* Don't forget to free the reserved space, as for inlined extent * Don't forget to free the reserved space, as for inlined extent
...@@ -4911,8 +4911,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size) ...@@ -4911,8 +4911,7 @@ int btrfs_cont_expand(struct btrfs_inode *inode, loff_t oldsize, loff_t size)
cur_offset + hole_size - 1, 0); cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map(); hole_em = alloc_extent_map();
if (!hole_em) { if (!hole_em) {
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, btrfs_set_inode_full_sync(inode);
&inode->runtime_flags);
goto next; goto next;
} }
hole_em->start = cur_offset; hole_em->start = cur_offset;
...@@ -6165,7 +6164,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, ...@@ -6165,7 +6164,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
* sync since it will be a full sync anyway and this will blow away the * sync since it will be a full sync anyway and this will blow away the
* old info in the log. * old info in the log.
*/ */
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); btrfs_set_inode_full_sync(BTRFS_I(inode));
key[0].objectid = objectid; key[0].objectid = objectid;
key[0].type = BTRFS_INODE_ITEM_KEY; key[0].type = BTRFS_INODE_ITEM_KEY;
...@@ -8767,7 +8766,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback) ...@@ -8767,7 +8766,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
* extents beyond i_size to drop. * extents beyond i_size to drop.
*/ */
if (control.extents_found > 0) if (control.extents_found > 0)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); btrfs_set_inode_full_sync(BTRFS_I(inode));
return ret; return ret;
} }
...@@ -9975,8 +9974,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ...@@ -9975,8 +9974,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em = alloc_extent_map(); em = alloc_extent_map();
if (!em) { if (!em) {
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, btrfs_set_inode_full_sync(BTRFS_I(inode));
&BTRFS_I(inode)->runtime_flags);
goto next; goto next;
} }
......
...@@ -277,7 +277,7 @@ static int clone_copy_inline_extent(struct inode *dst, ...@@ -277,7 +277,7 @@ static int clone_copy_inline_extent(struct inode *dst,
path->slots[0]), path->slots[0]),
size); size);
btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found); btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags); btrfs_set_inode_full_sync(BTRFS_I(dst));
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end); ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
out: out:
if (!ret && !trans) { if (!ret && !trans) {
...@@ -580,8 +580,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, ...@@ -580,8 +580,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
* replaced file extent items. * replaced file extent items.
*/ */
if (last_dest_end >= i_size_read(inode)) if (last_dest_end >= i_size_read(inode))
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, btrfs_set_inode_full_sync(BTRFS_I(inode));
&BTRFS_I(inode)->runtime_flags);
ret = btrfs_replace_file_extents(BTRFS_I(inode), path, ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
last_dest_end, destoff + len - 1, NULL, &trans); last_dest_end, destoff + len - 1, NULL, &trans);
......
...@@ -6013,6 +6013,14 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, ...@@ -6013,6 +6013,14 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
if (inode_only != LOG_INODE_EXISTS) if (inode_only != LOG_INODE_EXISTS)
inode->last_log_commit = inode->last_sub_trans; inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock); spin_unlock(&inode->lock);
/*
* Reset the last_reflink_trans so that the next fsync does not need to
* go through the slower path when logging extents and their checksums.
*/
if (inode_only == LOG_INODE_ALL)
inode->last_reflink_trans = 0;
out_unlock: out_unlock:
mutex_unlock(&inode->log_mutex); mutex_unlock(&inode->log_mutex);
out: out:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment