Commit f0fddcec authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.14-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A few fixes and one patch to help some block layer API cleanups:

   - skip missing device when running fstrim

   - fix unpersisted i_size on fsync after expanding truncate

   - fix lock inversion problem when doing qgroup extent tracing

   - replace bdgrab/bdput usage, replace gendisk by block_device"

* tag 'for-5.14-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: store a block_device in struct btrfs_ordered_extent
  btrfs: fix lock inversion problem when doing qgroup extent tracing
  btrfs: check for missing device in btrfs_trim_fs
  btrfs: fix unpersisted i_size on fsync after expanding truncate
parents 704f4cba c7c3a6dc
......@@ -1488,15 +1488,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots,
bool ignore_offset)
bool ignore_offset, bool skip_commit_root_sem)
{
int ret;
if (!trans)
if (!trans && !skip_commit_root_sem)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
time_seq, roots, ignore_offset);
if (!trans)
if (!trans && !skip_commit_root_sem)
up_read(&fs_info->commit_root_sem);
return ret;
}
......
......@@ -47,7 +47,8 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
const u64 *extent_item_pos, bool ignore_offset);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots, bool ignore_offset);
u64 time_seq, struct ulist **roots, bool ignore_offset,
bool skip_commit_root_sem);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
......
......@@ -974,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
if (qrecord_inserted)
btrfs_qgroup_trace_extent_post(fs_info, record);
btrfs_qgroup_trace_extent_post(trans, record);
return 0;
}
......@@ -1069,7 +1069,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
if (qrecord_inserted)
return btrfs_qgroup_trace_extent_post(fs_info, record);
return btrfs_qgroup_trace_extent_post(trans, record);
return 0;
}
......
......@@ -6019,6 +6019,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
mutex_lock(&fs_info->fs_devices->device_list_mutex);
devices = &fs_info->fs_devices->devices;
list_for_each_entry(device, devices, dev_list) {
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
continue;
ret = btrfs_trim_free_extents(device, &group_trimmed);
if (ret) {
dev_failed++;
......
......@@ -2992,7 +2992,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
if (ordered_extent->disk)
if (ordered_extent->bdev)
btrfs_rewrite_logical_zoned(ordered_extent);
btrfs_free_io_failure_record(inode, start, end);
......
......@@ -190,8 +190,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
entry->physical = (u64)-1;
entry->disk = NULL;
entry->partno = (u8)-1;
ASSERT(type == BTRFS_ORDERED_REGULAR ||
type == BTRFS_ORDERED_NOCOW ||
......
......@@ -145,8 +145,7 @@ struct btrfs_ordered_extent {
* command in a workqueue context
*/
u64 physical;
struct gendisk *disk;
u8 partno;
struct block_device *bdev;
};
/*
......
......@@ -1704,17 +1704,39 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
return 0;
}
int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_extent_record *qrecord)
{
struct ulist *old_root;
u64 bytenr = qrecord->bytenr;
int ret;
ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false);
/*
* We are always called in a context where we are already holding a
* transaction handle. Often we are called when adding a data delayed
* reference from btrfs_truncate_inode_items() (truncating or unlinking),
* in which case we will be holding a write lock on extent buffer from a
* subvolume tree. In this case we can't allow btrfs_find_all_roots() to
* acquire fs_info->commit_root_sem, because that is a higher level lock
* that must be acquired before locking any extent buffers.
*
* So we want btrfs_find_all_roots() to not acquire the commit_root_sem
* but we can't pass it a non-NULL transaction handle, because otherwise
* it would not use commit roots and would lock extent buffers, causing
* a deadlock if it ends up trying to read lock the same extent buffer
* that was previously write locked at btrfs_truncate_inode_items().
*
* So pass a NULL transaction handle to btrfs_find_all_roots() and
* explicitly tell it to not acquire the commit_root_sem - if we are
* holding a transaction handle we don't need its protection.
*/
ASSERT(trans != NULL);
ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
false, true);
if (ret < 0) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_warn(fs_info,
trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_warn(trans->fs_info,
"error accounting new delayed refs extent (err code: %d), quota inconsistent",
ret);
return 0;
......@@ -1758,7 +1780,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
kfree(record);
return 0;
}
return btrfs_qgroup_trace_extent_post(fs_info, record);
return btrfs_qgroup_trace_extent_post(trans, record);
}
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
......@@ -2629,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
/* Search commit root to find old_roots */
ret = btrfs_find_all_roots(NULL, fs_info,
record->bytenr, 0,
&record->old_roots, false);
&record->old_roots, false, false);
if (ret < 0)
goto cleanup;
}
......@@ -2645,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
* current root. It's safe inside commit_transaction().
*/
ret = btrfs_find_all_roots(trans, fs_info,
record->bytenr, BTRFS_SEQ_LAST, &new_roots, false);
record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false);
if (ret < 0)
goto cleanup;
if (qgroup_to_skip) {
......@@ -3179,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
num_bytes = found.offset;
ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
&roots, false);
&roots, false, false);
if (ret < 0)
goto out;
/* For rescan, just pass old_roots as NULL */
......
......@@ -298,7 +298,7 @@ int btrfs_qgroup_trace_extent_nolock(
* using current root, then we can move all expensive backref walk out of
* transaction committing, but not now as qgroup accounting will be wrong again.
*/
int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info,
int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_extent_record *qrecord);
/*
......
......@@ -224,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
* quota.
*/
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
......@@ -237,7 +237,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
......@@ -261,7 +261,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
new_roots = NULL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
......@@ -273,7 +273,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root,
return -EINVAL;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
......@@ -325,7 +325,7 @@ static int test_multiple_refs(struct btrfs_root *root,
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
......@@ -338,7 +338,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
......@@ -360,7 +360,7 @@ static int test_multiple_refs(struct btrfs_root *root,
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
......@@ -373,7 +373,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
......@@ -401,7 +401,7 @@ static int test_multiple_refs(struct btrfs_root *root,
}
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
test_err("couldn't find old roots: %d", ret);
......@@ -414,7 +414,7 @@ static int test_multiple_refs(struct btrfs_root *root,
return ret;
ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots,
false);
false, false);
if (ret) {
ulist_free(old_roots);
ulist_free(new_roots);
......
......@@ -5526,16 +5526,29 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
spin_lock(&inode->lock);
inode->logged_trans = trans->transid;
/*
* Don't update last_log_commit if we logged that an inode exists
* after it was loaded to memory (full_sync bit set).
* This is to prevent data loss when we do a write to the inode,
* then the inode gets evicted after all delalloc was flushed,
* then we log it exists (due to a rename for example) and then
* fsync it. This last fsync would do nothing (not logging the
* extents previously written).
* Don't update last_log_commit if we logged that an inode exists.
* We do this for two reasons:
*
* 1) We might have had buffered writes to this inode that were
* flushed and had their ordered extents completed in this
* transaction, but we did not previously log the inode with
* LOG_INODE_ALL. Later the inode was evicted and after that
* it was loaded again and this LOG_INODE_EXISTS log operation
* happened. We must make sure that if an explicit fsync against
* the inode is performed later, it logs the new extents, an
* updated inode item, etc, and syncs the log. The same logic
* applies to direct IO writes instead of buffered writes.
*
* 2) When we log the inode with LOG_INODE_EXISTS, its inode item
* is logged with an i_size of 0 or whatever value was logged
* before. If later the i_size of the inode is increased by a
* truncate operation, the log is synced through an fsync of
* some other inode and then finally an explicit fsync against
* this inode is made, we must make sure this fsync logs the
* inode with the new i_size, the hole between old i_size and
* the new i_size, and syncs the log.
*/
if (inode_only != LOG_INODE_EXISTS ||
!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags))
if (inode_only != LOG_INODE_EXISTS)
inode->last_log_commit = inode->last_sub_trans;
spin_unlock(&inode->lock);
}
......
......@@ -1349,8 +1349,7 @@ void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
return;
ordered->physical = physical;
ordered->disk = bio->bi_bdev->bd_disk;
ordered->partno = bio->bi_bdev->bd_partno;
ordered->bdev = bio->bi_bdev;
btrfs_put_ordered_extent(ordered);
}
......@@ -1362,18 +1361,16 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
struct extent_map_tree *em_tree;
struct extent_map *em;
struct btrfs_ordered_sum *sum;
struct block_device *bdev;
u64 orig_logical = ordered->disk_bytenr;
u64 *logical = NULL;
int nr, stripe_len;
/* Zoned devices should not have partitions. So, we can assume it is 0 */
ASSERT(ordered->partno == 0);
bdev = bdgrab(ordered->disk->part0);
if (WARN_ON(!bdev))
ASSERT(!bdev_is_partition(ordered->bdev));
if (WARN_ON(!ordered->bdev))
return;
if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev,
if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev,
ordered->physical, &logical, &nr,
&stripe_len)))
goto out;
......@@ -1402,7 +1399,6 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
out:
kfree(logical);
bdput(bdev);
}
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment