Commit 3669558b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:

 - several fixes for handling directory item (inserting, removing,
   iteration, error handling)

 - fix transaction commit stalls when auto relocation is running and
   blocks other tasks that want to commit

 - fix a build error when DEBUG is enabled

 - fix lockdep warning in inode number lookup ioctl

 - fix race when finishing block group creation

 - remove link to obsolete wiki in several files

* tag 'for-6.6-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  MAINTAINERS: remove links to obsolete btrfs.wiki.kernel.org
  btrfs: assert delayed node locked when removing delayed item
  btrfs: remove BUG() after failure to insert delayed dir index item
  btrfs: improve error message after failure to add delayed dir index item
  btrfs: fix a compilation error if DEBUG is defined in btree_dirty_folio
  btrfs: check for BTRFS_FS_ERROR in pending ordered assert
  btrfs: fix lockdep splat and potential deadlock after failure running delayed items
  btrfs: do not block starts waiting on previous transaction commit
  btrfs: release path before inode lookup during the ino lookup ioctl
  btrfs: fix race between finishing block group creation and its item update
parents 2c758cef 5facccc9
...@@ -37,7 +37,6 @@ For more information please refer to the documentation site or wiki ...@@ -37,7 +37,6 @@ For more information please refer to the documentation site or wiki
https://btrfs.readthedocs.io https://btrfs.readthedocs.io
https://btrfs.wiki.kernel.org
that maintains information about administration tasks, frequently asked that maintains information about administration tasks, frequently asked
questions, use cases, mount options, comprehensible changelogs, features, questions, use cases, mount options, comprehensible changelogs, features,
......
...@@ -4378,7 +4378,6 @@ M: David Sterba <dsterba@suse.com> ...@@ -4378,7 +4378,6 @@ M: David Sterba <dsterba@suse.com>
L: linux-btrfs@vger.kernel.org L: linux-btrfs@vger.kernel.org
S: Maintained S: Maintained
W: https://btrfs.readthedocs.io W: https://btrfs.readthedocs.io
W: https://btrfs.wiki.kernel.org/
Q: https://patchwork.kernel.org/project/linux-btrfs/list/ Q: https://patchwork.kernel.org/project/linux-btrfs/list/
C: irc://irc.libera.chat/btrfs C: irc://irc.libera.chat/btrfs
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
......
...@@ -31,7 +31,7 @@ config BTRFS_FS ...@@ -31,7 +31,7 @@ config BTRFS_FS
continue to be mountable and usable by newer kernels. continue to be mountable and usable by newer kernels.
For more information, please see the web pages at For more information, please see the web pages at
http://btrfs.wiki.kernel.org. https://btrfs.readthedocs.io
To compile this file system support as a module, choose M here. The To compile this file system support as a module, choose M here. The
module will be called btrfs. module will be called btrfs.
......
...@@ -3028,8 +3028,16 @@ static int update_block_group_item(struct btrfs_trans_handle *trans, ...@@ -3028,8 +3028,16 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
fail: fail:
btrfs_release_path(path); btrfs_release_path(path);
/* We didn't update the block group item, need to revert @commit_used. */ /*
if (ret < 0) { * We didn't update the block group item, need to revert commit_used
* unless the block group item didn't exist yet - this is to prevent a
* race with a concurrent insertion of the block group item, with
* insert_block_group_item(), that happened just after we attempted to
* update. In that case we would reset commit_used to 0 just after the
* insertion set it to a value greater than 0 - if the block group later
* becomes with 0 used bytes, we would incorrectly skip its update.
*/
if (ret < 0 && ret != -ENOENT) {
spin_lock(&cache->lock); spin_lock(&cache->lock);
cache->commit_used = old_commit_used; cache->commit_used = old_commit_used;
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
......
...@@ -412,6 +412,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root) ...@@ -412,6 +412,7 @@ static void finish_one_item(struct btrfs_delayed_root *delayed_root)
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{ {
struct btrfs_delayed_node *delayed_node = delayed_item->delayed_node;
struct rb_root_cached *root; struct rb_root_cached *root;
struct btrfs_delayed_root *delayed_root; struct btrfs_delayed_root *delayed_root;
...@@ -419,18 +420,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item) ...@@ -419,18 +420,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
if (RB_EMPTY_NODE(&delayed_item->rb_node)) if (RB_EMPTY_NODE(&delayed_item->rb_node))
return; return;
delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root; /* If it's in a rbtree, then we need to have delayed node locked. */
lockdep_assert_held(&delayed_node->mutex);
delayed_root = delayed_node->root->fs_info->delayed_root;
BUG_ON(!delayed_root); BUG_ON(!delayed_root);
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM) if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_item->delayed_node->ins_root; root = &delayed_node->ins_root;
else else
root = &delayed_item->delayed_node->del_root; root = &delayed_node->del_root;
rb_erase_cached(&delayed_item->rb_node, root); rb_erase_cached(&delayed_item->rb_node, root);
RB_CLEAR_NODE(&delayed_item->rb_node); RB_CLEAR_NODE(&delayed_item->rb_node);
delayed_item->delayed_node->count--; delayed_node->count--;
finish_one_item(delayed_root); finish_one_item(delayed_root);
} }
...@@ -1153,20 +1157,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr) ...@@ -1153,20 +1157,33 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
ret = __btrfs_commit_inode_delayed_items(trans, path, ret = __btrfs_commit_inode_delayed_items(trans, path,
curr_node); curr_node);
if (ret) { if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
break; break;
} }
prev_node = curr_node; prev_node = curr_node;
curr_node = btrfs_next_delayed_node(curr_node); curr_node = btrfs_next_delayed_node(curr_node);
/*
* See the comment below about releasing path before releasing
* node. If the commit of delayed items was successful the path
* should always be released, but in case of an error, it may
* point to locked extent buffers (a leaf at the very least).
*/
ASSERT(path->nodes[0] == NULL);
btrfs_release_delayed_node(prev_node); btrfs_release_delayed_node(prev_node);
} }
/*
* Release the path to avoid a potential deadlock and lockdep splat when
* releasing the delayed node, as that requires taking the delayed node's
* mutex. If another task starts running delayed items before we take
* the mutex, it will first lock the mutex and then it may try to lock
* the same btree path (leaf).
*/
btrfs_free_path(path);
if (curr_node) if (curr_node)
btrfs_release_delayed_node(curr_node); btrfs_release_delayed_node(curr_node);
btrfs_free_path(path);
trans->block_rsv = block_rsv; trans->block_rsv = block_rsv;
return ret; return ret;
...@@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info) ...@@ -1413,7 +1430,29 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH); btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
} }
/* Will return 0 or -ENOMEM */ static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1);
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
return;
/*
* Adding the new dir index item does not require touching another
* leaf, so we can release 1 unit of metadata that was previously
* reserved when starting the transaction. This applies only to
* the case where we had a transaction start and excludes the
* transaction join case (when replaying log trees).
*/
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, bytes, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
ASSERT(trans->bytes_reserved >= bytes);
trans->bytes_reserved -= bytes;
}
/* Will return 0, -ENOMEM or -EEXIST (index number collision, unexpected). */
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
const char *name, int name_len, const char *name, int name_len,
struct btrfs_inode *dir, struct btrfs_inode *dir,
...@@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, ...@@ -1455,6 +1494,27 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
mutex_lock(&delayed_node->mutex); mutex_lock(&delayed_node->mutex);
/*
* First attempt to insert the delayed item. This is to make the error
* handling path simpler in case we fail (-EEXIST). There's no risk of
* any other task coming in and running the delayed item before we do
* the metadata space reservation below, because we are holding the
* delayed node's mutex and that mutex must also be locked before the
* node's delayed items can be run.
*/
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"error adding delayed dir index item, name: %.*s, index: %llu, root: %llu, dir: %llu, dir->index_cnt: %llu, delayed_node->index_cnt: %llu, error: %d",
name_len, name, index, btrfs_root_id(delayed_node->root),
delayed_node->inode_id, dir->index_cnt,
delayed_node->index_cnt, ret);
btrfs_release_delayed_item(delayed_item);
btrfs_release_dir_index_item_space(trans);
mutex_unlock(&delayed_node->mutex);
goto release_node;
}
if (delayed_node->index_item_leaves == 0 || if (delayed_node->index_item_leaves == 0 ||
delayed_node->curr_index_batch_size + data_len > leaf_data_size) { delayed_node->curr_index_batch_size + data_len > leaf_data_size) {
delayed_node->curr_index_batch_size = data_len; delayed_node->curr_index_batch_size = data_len;
...@@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans, ...@@ -1472,36 +1532,14 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
* impossible. * impossible.
*/ */
if (WARN_ON(ret)) { if (WARN_ON(ret)) {
mutex_unlock(&delayed_node->mutex);
btrfs_release_delayed_item(delayed_item); btrfs_release_delayed_item(delayed_item);
mutex_unlock(&delayed_node->mutex);
goto release_node; goto release_node;
} }
delayed_node->index_item_leaves++; delayed_node->index_item_leaves++;
} else if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { } else {
const u64 bytes = btrfs_calc_insert_metadata_size(fs_info, 1); btrfs_release_dir_index_item_space(trans);
/*
* Adding the new dir index item does not require touching another
* leaf, so we can release 1 unit of metadata that was previously
* reserved when starting the transaction. This applies only to
* the case where we had a transaction start and excludes the
* transaction join case (when replaying log trees).
*/
trace_btrfs_space_reservation(fs_info, "transaction",
trans->transid, bytes, 0);
btrfs_block_rsv_release(fs_info, trans->block_rsv, bytes, NULL);
ASSERT(trans->bytes_reserved >= bytes);
trans->bytes_reserved -= bytes;
}
ret = __btrfs_add_delayed_item(delayed_node, delayed_item);
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"err add delayed dir index item(name: %.*s) into the insertion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
name_len, name, delayed_node->root->root_key.objectid,
delayed_node->inode_id, ret);
BUG();
} }
mutex_unlock(&delayed_node->mutex); mutex_unlock(&delayed_node->mutex);
......
...@@ -520,6 +520,7 @@ static bool btree_dirty_folio(struct address_space *mapping, ...@@ -520,6 +520,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
struct folio *folio) struct folio *folio)
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
struct btrfs_subpage_info *spi = fs_info->subpage_info;
struct btrfs_subpage *subpage; struct btrfs_subpage *subpage;
struct extent_buffer *eb; struct extent_buffer *eb;
int cur_bit = 0; int cur_bit = 0;
...@@ -533,18 +534,19 @@ static bool btree_dirty_folio(struct address_space *mapping, ...@@ -533,18 +534,19 @@ static bool btree_dirty_folio(struct address_space *mapping,
btrfs_assert_tree_write_locked(eb); btrfs_assert_tree_write_locked(eb);
return filemap_dirty_folio(mapping, folio); return filemap_dirty_folio(mapping, folio);
} }
ASSERT(spi);
subpage = folio_get_private(folio); subpage = folio_get_private(folio);
ASSERT(subpage->dirty_bitmap); for (cur_bit = spi->dirty_offset;
while (cur_bit < BTRFS_SUBPAGE_BITMAP_SIZE) { cur_bit < spi->dirty_offset + spi->bitmap_nr_bits;
cur_bit++) {
unsigned long flags; unsigned long flags;
u64 cur; u64 cur;
u16 tmp = (1 << cur_bit);
spin_lock_irqsave(&subpage->lock, flags); spin_lock_irqsave(&subpage->lock, flags);
if (!(tmp & subpage->dirty_bitmap)) { if (!test_bit(cur_bit, subpage->bitmaps)) {
spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock_irqrestore(&subpage->lock, flags);
cur_bit++;
continue; continue;
} }
spin_unlock_irqrestore(&subpage->lock, flags); spin_unlock_irqrestore(&subpage->lock, flags);
...@@ -557,7 +559,7 @@ static bool btree_dirty_folio(struct address_space *mapping, ...@@ -557,7 +559,7 @@ static bool btree_dirty_folio(struct address_space *mapping,
btrfs_assert_tree_write_locked(eb); btrfs_assert_tree_write_locked(eb);
free_extent_buffer(eb); free_extent_buffer(eb);
cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits); cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits) - 1;
} }
return filemap_dirty_folio(mapping, folio); return filemap_dirty_folio(mapping, folio);
} }
...@@ -1547,7 +1549,7 @@ static int transaction_kthread(void *arg) ...@@ -1547,7 +1549,7 @@ static int transaction_kthread(void *arg)
delta = ktime_get_seconds() - cur->start_time; delta = ktime_get_seconds() - cur->start_time;
if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) && if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) &&
cur->state < TRANS_STATE_COMMIT_START && cur->state < TRANS_STATE_COMMIT_PREP &&
delta < fs_info->commit_interval) { delta < fs_info->commit_interval) {
spin_unlock(&fs_info->trans_lock); spin_unlock(&fs_info->trans_lock);
delay -= msecs_to_jiffies((delta - 1) * 1000); delay -= msecs_to_jiffies((delta - 1) * 1000);
...@@ -2682,8 +2684,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) ...@@ -2682,8 +2684,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters); btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered); btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent); btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start, btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep,
BTRFS_LOCKDEP_TRANS_COMMIT_START); BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked, btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
BTRFS_LOCKDEP_TRANS_UNBLOCKED); BTRFS_LOCKDEP_TRANS_UNBLOCKED);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed, btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
...@@ -4870,7 +4872,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) ...@@ -4870,7 +4872,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
while (!list_empty(&fs_info->trans_list)) { while (!list_empty(&fs_info->trans_list)) {
t = list_first_entry(&fs_info->trans_list, t = list_first_entry(&fs_info->trans_list,
struct btrfs_transaction, list); struct btrfs_transaction, list);
if (t->state >= TRANS_STATE_COMMIT_START) { if (t->state >= TRANS_STATE_COMMIT_PREP) {
refcount_inc(&t->use_count); refcount_inc(&t->use_count);
spin_unlock(&fs_info->trans_lock); spin_unlock(&fs_info->trans_lock);
btrfs_wait_for_commit(fs_info, t->transid); btrfs_wait_for_commit(fs_info, t->transid);
......
...@@ -1958,6 +1958,13 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, ...@@ -1958,6 +1958,13 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put; goto out_put;
} }
/*
* We don't need the path anymore, so release it and
* avoid deadlocks and lockdep warnings in case
* btrfs_iget() needs to lookup the inode from its root
* btree and lock the same leaf.
*/
btrfs_release_path(path);
temp_inode = btrfs_iget(sb, key2.objectid, root); temp_inode = btrfs_iget(sb, key2.objectid, root);
if (IS_ERR(temp_inode)) { if (IS_ERR(temp_inode)) {
ret = PTR_ERR(temp_inode); ret = PTR_ERR(temp_inode);
...@@ -1978,7 +1985,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap, ...@@ -1978,7 +1985,6 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
goto out_put; goto out_put;
} }
btrfs_release_path(path);
key.objectid = key.offset; key.objectid = key.offset;
key.offset = (u64)-1; key.offset = (u64)-1;
dirid = key.objectid; dirid = key.objectid;
......
...@@ -79,7 +79,7 @@ enum btrfs_lock_nesting { ...@@ -79,7 +79,7 @@ enum btrfs_lock_nesting {
}; };
enum btrfs_lockdep_trans_states { enum btrfs_lockdep_trans_states {
BTRFS_LOCKDEP_TRANS_COMMIT_START, BTRFS_LOCKDEP_TRANS_COMMIT_PREP,
BTRFS_LOCKDEP_TRANS_UNBLOCKED, BTRFS_LOCKDEP_TRANS_UNBLOCKED,
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
BTRFS_LOCKDEP_TRANS_COMPLETED, BTRFS_LOCKDEP_TRANS_COMPLETED,
......
...@@ -639,7 +639,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, ...@@ -639,7 +639,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
refcount_inc(&trans->use_count); refcount_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock); spin_unlock(&fs_info->trans_lock);
ASSERT(trans); ASSERT(trans || BTRFS_FS_ERROR(fs_info));
if (trans) { if (trans) {
if (atomic_dec_and_test(&trans->pending_ordered)) if (atomic_dec_and_test(&trans->pending_ordered))
wake_up(&trans->pending_wait); wake_up(&trans->pending_wait);
......
...@@ -56,12 +56,17 @@ static struct kmem_cache *btrfs_trans_handle_cachep; ...@@ -56,12 +56,17 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
* | Call btrfs_commit_transaction() on any trans handle attached to * | Call btrfs_commit_transaction() on any trans handle attached to
* | transaction N * | transaction N
* V * V
* Transaction N [[TRANS_STATE_COMMIT_START]] * Transaction N [[TRANS_STATE_COMMIT_PREP]]
* |
* | If there are simultaneous calls to btrfs_commit_transaction() one will win
* | the race and the rest will wait for the winner to commit the transaction.
* | * |
* | Will wait for previous running transaction to completely finish if there * | The winner will wait for previous running transaction to completely finish
* | is one * | if there is one.
* |
* Transaction N [[TRANS_STATE_COMMIT_START]]
* | * |
* | Then one of the following happes: * | Then one of the following happens:
* | - Wait for all other trans handle holders to release. * | - Wait for all other trans handle holders to release.
* | The btrfs_commit_transaction() caller will do the commit work. * | The btrfs_commit_transaction() caller will do the commit work.
* | - Wait for current transaction to be committed by others. * | - Wait for current transaction to be committed by others.
...@@ -112,6 +117,7 @@ static struct kmem_cache *btrfs_trans_handle_cachep; ...@@ -112,6 +117,7 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
*/ */
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_RUNNING] = 0U, [TRANS_STATE_RUNNING] = 0U,
[TRANS_STATE_COMMIT_PREP] = 0U,
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START | [TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH | __TRANS_ATTACH |
...@@ -1982,7 +1988,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans) ...@@ -1982,7 +1988,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
* Wait for the current transaction commit to start and block * Wait for the current transaction commit to start and block
* subsequent transaction joins * subsequent transaction joins
*/ */
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
wait_event(fs_info->transaction_blocked_wait, wait_event(fs_info->transaction_blocked_wait,
cur_trans->state >= TRANS_STATE_COMMIT_START || cur_trans->state >= TRANS_STATE_COMMIT_START ||
TRANS_ABORTED(cur_trans)); TRANS_ABORTED(cur_trans));
...@@ -2129,7 +2135,7 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans) ...@@ -2129,7 +2135,7 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans)
return; return;
lockdep_assert_held(&trans->fs_info->trans_lock); lockdep_assert_held(&trans->fs_info->trans_lock);
ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START); ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP);
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots); list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
} }
...@@ -2153,7 +2159,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2153,7 +2159,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
ktime_t interval; ktime_t interval;
ASSERT(refcount_read(&trans->use_count) == 1); ASSERT(refcount_read(&trans->use_count) == 1);
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags); clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
...@@ -2213,7 +2219,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2213,7 +2219,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
} }
spin_lock(&fs_info->trans_lock); spin_lock(&fs_info->trans_lock);
if (cur_trans->state >= TRANS_STATE_COMMIT_START) { if (cur_trans->state >= TRANS_STATE_COMMIT_PREP) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED; enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
add_pending_snapshot(trans); add_pending_snapshot(trans);
...@@ -2225,7 +2231,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2225,7 +2231,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
want_state = TRANS_STATE_SUPER_COMMITTED; want_state = TRANS_STATE_SUPER_COMMITTED;
btrfs_trans_state_lockdep_release(fs_info, btrfs_trans_state_lockdep_release(fs_info,
BTRFS_LOCKDEP_TRANS_COMMIT_START); BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
ret = btrfs_end_transaction(trans); ret = btrfs_end_transaction(trans);
wait_for_commit(cur_trans, want_state); wait_for_commit(cur_trans, want_state);
...@@ -2237,9 +2243,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2237,9 +2243,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
return ret; return ret;
} }
cur_trans->state = TRANS_STATE_COMMIT_START; cur_trans->state = TRANS_STATE_COMMIT_PREP;
wake_up(&fs_info->transaction_blocked_wait); wake_up(&fs_info->transaction_blocked_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
if (cur_trans->list.prev != &fs_info->trans_list) { if (cur_trans->list.prev != &fs_info->trans_list) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED; enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
...@@ -2260,11 +2266,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2260,11 +2266,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
btrfs_put_transaction(prev_trans); btrfs_put_transaction(prev_trans);
if (ret) if (ret)
goto lockdep_release; goto lockdep_release;
} else { spin_lock(&fs_info->trans_lock);
spin_unlock(&fs_info->trans_lock);
} }
} else { } else {
spin_unlock(&fs_info->trans_lock);
/* /*
* The previous transaction was aborted and was already removed * The previous transaction was aborted and was already removed
* from the list of transactions at fs_info->trans_list. So we * from the list of transactions at fs_info->trans_list. So we
...@@ -2272,11 +2276,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2272,11 +2276,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* corrupt state (pointing to trees with unwritten nodes/leafs). * corrupt state (pointing to trees with unwritten nodes/leafs).
*/ */
if (BTRFS_FS_ERROR(fs_info)) { if (BTRFS_FS_ERROR(fs_info)) {
spin_unlock(&fs_info->trans_lock);
ret = -EROFS; ret = -EROFS;
goto lockdep_release; goto lockdep_release;
} }
} }
cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&fs_info->transaction_blocked_wait);
spin_unlock(&fs_info->trans_lock);
/* /*
* Get the time spent on the work done by the commit thread and not * Get the time spent on the work done by the commit thread and not
* the time spent waiting on a previous commit * the time spent waiting on a previous commit
...@@ -2586,7 +2595,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) ...@@ -2586,7 +2595,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
goto cleanup_transaction; goto cleanup_transaction;
lockdep_trans_commit_start_release: lockdep_trans_commit_start_release:
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START); btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
btrfs_end_transaction(trans); btrfs_end_transaction(trans);
return ret; return ret;
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
enum btrfs_trans_state { enum btrfs_trans_state {
TRANS_STATE_RUNNING, TRANS_STATE_RUNNING,
TRANS_STATE_COMMIT_PREP,
TRANS_STATE_COMMIT_START, TRANS_STATE_COMMIT_START,
TRANS_STATE_COMMIT_DOING, TRANS_STATE_COMMIT_DOING,
TRANS_STATE_UNBLOCKED, TRANS_STATE_UNBLOCKED,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment