Commit 07978330 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.10-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - fix handling of folio private changes.

   The private value holds pointer to our extent buffer structure
   representing a metadata range. Release and create of the range was
   not properly synchronized when updating the private bit which ended
   up in double folio_put, leading to all sorts of breakage

 - fix a crash, reported as duplicate key in metadata, but caused by a
   race of fsync and size extending write. Requires prealloc target
   range + fsync and other conditions (log tree state, timing)

 - fix leak of qgroup extent records after transaction abort

* tag 'for-6.10-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: protect folio::private when attaching extent buffer folios
  btrfs: fix leak of qgroup extent records after transaction abort
  btrfs: fix crash on racing fsync and size-extending write into prealloc
parents eecba7c0 f3a5367c
...@@ -4538,18 +4538,10 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, ...@@ -4538,18 +4538,10 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info) struct btrfs_fs_info *fs_info)
{ {
struct rb_node *node; struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
struct btrfs_delayed_ref_node *ref; struct btrfs_delayed_ref_node *ref;
delayed_refs = &trans->delayed_refs;
spin_lock(&delayed_refs->lock); spin_lock(&delayed_refs->lock);
if (atomic_read(&delayed_refs->num_entries) == 0) {
spin_unlock(&delayed_refs->lock);
btrfs_debug(fs_info, "delayed_refs has NO entry");
return;
}
while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) { while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
struct btrfs_delayed_ref_head *head; struct btrfs_delayed_ref_head *head;
struct rb_node *n; struct rb_node *n;
......
...@@ -3689,6 +3689,8 @@ static struct extent_buffer *grab_extent_buffer( ...@@ -3689,6 +3689,8 @@ static struct extent_buffer *grab_extent_buffer(
struct folio *folio = page_folio(page); struct folio *folio = page_folio(page);
struct extent_buffer *exists; struct extent_buffer *exists;
lockdep_assert_held(&page->mapping->i_private_lock);
/* /*
* For subpage case, we completely rely on radix tree to ensure we * For subpage case, we completely rely on radix tree to ensure we
* don't try to insert two ebs for the same bytenr. So here we always * don't try to insert two ebs for the same bytenr. So here we always
...@@ -3756,13 +3758,14 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start) ...@@ -3756,13 +3758,14 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start)
* The caller needs to free the existing folios and retry using the same order. * The caller needs to free the existing folios and retry using the same order.
*/ */
static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
struct btrfs_subpage *prealloc,
struct extent_buffer **found_eb_ret) struct extent_buffer **found_eb_ret)
{ {
struct btrfs_fs_info *fs_info = eb->fs_info; struct btrfs_fs_info *fs_info = eb->fs_info;
struct address_space *mapping = fs_info->btree_inode->i_mapping; struct address_space *mapping = fs_info->btree_inode->i_mapping;
const unsigned long index = eb->start >> PAGE_SHIFT; const unsigned long index = eb->start >> PAGE_SHIFT;
struct folio *existing_folio; struct folio *existing_folio = NULL;
int ret; int ret;
ASSERT(found_eb_ret); ASSERT(found_eb_ret);
...@@ -3774,12 +3777,14 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, ...@@ -3774,12 +3777,14 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
ret = filemap_add_folio(mapping, eb->folios[i], index + i, ret = filemap_add_folio(mapping, eb->folios[i], index + i,
GFP_NOFS | __GFP_NOFAIL); GFP_NOFS | __GFP_NOFAIL);
if (!ret) if (!ret)
return 0; goto finish;
existing_folio = filemap_lock_folio(mapping, index + i); existing_folio = filemap_lock_folio(mapping, index + i);
/* The page cache only exists for a very short time, just retry. */ /* The page cache only exists for a very short time, just retry. */
if (IS_ERR(existing_folio)) if (IS_ERR(existing_folio)) {
existing_folio = NULL;
goto retry; goto retry;
}
/* For now, we should only have single-page folios for btree inode. */ /* For now, we should only have single-page folios for btree inode. */
ASSERT(folio_nr_pages(existing_folio) == 1); ASSERT(folio_nr_pages(existing_folio) == 1);
...@@ -3790,14 +3795,13 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, ...@@ -3790,14 +3795,13 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
return -EAGAIN; return -EAGAIN;
} }
if (fs_info->nodesize < PAGE_SIZE) { finish:
/* spin_lock(&mapping->i_private_lock);
* We're going to reuse the existing page, can drop our page if (existing_folio && fs_info->nodesize < PAGE_SIZE) {
* and subpage structure now. /* We're going to reuse the existing page, can drop our folio now. */
*/
__free_page(folio_page(eb->folios[i], 0)); __free_page(folio_page(eb->folios[i], 0));
eb->folios[i] = existing_folio; eb->folios[i] = existing_folio;
} else { } else if (existing_folio) {
struct extent_buffer *existing_eb; struct extent_buffer *existing_eb;
existing_eb = grab_extent_buffer(fs_info, existing_eb = grab_extent_buffer(fs_info,
...@@ -3805,6 +3809,7 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, ...@@ -3805,6 +3809,7 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
if (existing_eb) { if (existing_eb) {
/* The extent buffer still exists, we can use it directly. */ /* The extent buffer still exists, we can use it directly. */
*found_eb_ret = existing_eb; *found_eb_ret = existing_eb;
spin_unlock(&mapping->i_private_lock);
folio_unlock(existing_folio); folio_unlock(existing_folio);
folio_put(existing_folio); folio_put(existing_folio);
return 1; return 1;
...@@ -3813,6 +3818,22 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, ...@@ -3813,6 +3818,22 @@ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i,
__free_page(folio_page(eb->folios[i], 0)); __free_page(folio_page(eb->folios[i], 0));
eb->folios[i] = existing_folio; eb->folios[i] = existing_folio;
} }
eb->folio_size = folio_size(eb->folios[i]);
eb->folio_shift = folio_shift(eb->folios[i]);
/* Should not fail, as we have preallocated the memory. */
ret = attach_extent_buffer_folio(eb, eb->folios[i], prealloc);
ASSERT(!ret);
/*
* To inform we have an extra eb under allocation, so that
* detach_extent_buffer_page() won't release the folio private when the
* eb hasn't been inserted into radix tree yet.
*
* The ref will be decreased when the eb releases the page, in
* detach_extent_buffer_page(). Thus needs no special handling in the
* error path.
*/
btrfs_folio_inc_eb_refs(fs_info, eb->folios[i]);
spin_unlock(&mapping->i_private_lock);
return 0; return 0;
} }
...@@ -3824,7 +3845,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, ...@@ -3824,7 +3845,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
int attached = 0; int attached = 0;
struct extent_buffer *eb; struct extent_buffer *eb;
struct extent_buffer *existing_eb = NULL; struct extent_buffer *existing_eb = NULL;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
struct btrfs_subpage *prealloc = NULL; struct btrfs_subpage *prealloc = NULL;
u64 lockdep_owner = owner_root; u64 lockdep_owner = owner_root;
bool page_contig = true; bool page_contig = true;
...@@ -3890,7 +3910,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, ...@@ -3890,7 +3910,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
for (int i = 0; i < num_folios; i++) { for (int i = 0; i < num_folios; i++) {
struct folio *folio; struct folio *folio;
ret = attach_eb_folio_to_filemap(eb, i, &existing_eb); ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb);
if (ret > 0) { if (ret > 0) {
ASSERT(existing_eb); ASSERT(existing_eb);
goto out; goto out;
...@@ -3927,24 +3947,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, ...@@ -3927,24 +3947,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
* and free the allocated page. * and free the allocated page.
*/ */
folio = eb->folios[i]; folio = eb->folios[i];
eb->folio_size = folio_size(folio);
eb->folio_shift = folio_shift(folio);
spin_lock(&mapping->i_private_lock);
/* Should not fail, as we have preallocated the memory */
ret = attach_extent_buffer_folio(eb, folio, prealloc);
ASSERT(!ret);
/*
* To inform we have extra eb under allocation, so that
* detach_extent_buffer_page() won't release the folio private
* when the eb hasn't yet been inserted into radix tree.
*
* The ref will be decreased when the eb released the page, in
* detach_extent_buffer_page().
* Thus needs no special handling in error path.
*/
btrfs_folio_inc_eb_refs(fs_info, folio);
spin_unlock(&mapping->i_private_lock);
WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len)); WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len));
/* /*
......
...@@ -4860,18 +4860,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, ...@@ -4860,18 +4860,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
path->slots[0]++; path->slots[0]++;
continue; continue;
} }
if (!dropped_extents) { /*
/* * Avoid overlapping items in the log tree. The first time we
* Avoid logging extent items logged in past fsync calls * get here, get rid of everything from a past fsync. After
* and leading to duplicate keys in the log tree. * that, if the current extent starts before the end of the last
*/ * extent we copied, truncate the last one. This can happen if
* an ordered extent completion modifies the subvolume tree
* while btrfs_next_leaf() has the tree unlocked.
*/
if (!dropped_extents || key.offset < truncate_offset) {
ret = truncate_inode_items(trans, root->log_root, inode, ret = truncate_inode_items(trans, root->log_root, inode,
truncate_offset, min(key.offset, truncate_offset),
BTRFS_EXTENT_DATA_KEY); BTRFS_EXTENT_DATA_KEY);
if (ret) if (ret)
goto out; goto out;
dropped_extents = true; dropped_extents = true;
} }
truncate_offset = btrfs_file_extent_end(path);
if (ins_nr == 0) if (ins_nr == 0)
start_slot = slot; start_slot = slot;
ins_nr++; ins_nr++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment