Commit e33e17ee authored by Jeff Mahoney's avatar Jeff Mahoney Committed by Chris Mason

btrfs: add missing discards when unpinning extents with -o discard

When we clear the dirty bits in btrfs_delete_unused_bgs for extents
in the empty block group, it results in btrfs_finish_extent_commit being
unable to discard the freed extents.

The block group removal patch added an alternate path to forget extents
other than btrfs_finish_extent_commit.  As a result, any extents that
would be freed when the block group is removed aren't discarded.  In my
test run, with a large copy of mixed sized files followed by removal, it
left nearly 2/3 of extents undiscarded.

To clean up the block groups, we add the removed block group onto a list
that will be discarded after transaction commit.
Signed-off-by: default avatarJeff Mahoney <jeffm@suse.com>
Reviewed-by: default avatarFilipe Manana <fdmanana@suse.com>
Tested-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent e44163e1
...@@ -3437,6 +3437,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ...@@ -3437,6 +3437,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start, struct btrfs_root *root, u64 group_start,
struct extent_map *em); struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info); void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
...@@ -4073,6 +4075,7 @@ __cold ...@@ -4073,6 +4075,7 @@ __cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...); unsigned int line, int errno, const char *fmt, ...);
const char *btrfs_decode_error(int errno);
__cold __cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
......
...@@ -6131,20 +6131,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ...@@ -6131,20 +6131,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root) struct btrfs_root *root)
{ {
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *block_group, *tmp;
struct list_head *deleted_bgs;
struct extent_io_tree *unpin; struct extent_io_tree *unpin;
u64 start; u64 start;
u64 end; u64 end;
int ret; int ret;
if (trans->aborted)
return 0;
if (fs_info->pinned_extents == &fs_info->freed_extents[0]) if (fs_info->pinned_extents == &fs_info->freed_extents[0])
unpin = &fs_info->freed_extents[1]; unpin = &fs_info->freed_extents[1];
else else
unpin = &fs_info->freed_extents[0]; unpin = &fs_info->freed_extents[0];
while (1) { while (!trans->aborted) {
mutex_lock(&fs_info->unused_bg_unpin_mutex); mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end, ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY, NULL); EXTENT_DIRTY, NULL);
...@@ -6163,6 +6162,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ...@@ -6163,6 +6162,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched(); cond_resched();
} }
/*
* Transaction is finished. We don't need the lock anymore. We
* do need to clean up the block groups in case of a transaction
* abort.
*/
deleted_bgs = &trans->transaction->deleted_bgs;
list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
u64 trimmed = 0;
ret = -EROFS;
if (!trans->aborted)
ret = btrfs_discard_extent(root,
block_group->key.objectid,
block_group->key.offset,
&trimmed);
list_del_init(&block_group->bg_list);
btrfs_put_block_group_trimming(block_group);
btrfs_put_block_group(block_group);
if (ret) {
const char *errstr = btrfs_decode_error(ret);
btrfs_warn(fs_info,
"Discard failed while removing blockgroup: errno=%d %s\n",
ret, errstr);
}
}
return 0; return 0;
} }
...@@ -9903,6 +9930,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ...@@ -9903,6 +9930,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* currently running transaction might finish and a new one start, * currently running transaction might finish and a new one start,
* allowing for new block groups to be created that can reuse the same * allowing for new block groups to be created that can reuse the same
* physical device locations unless we take this special care. * physical device locations unless we take this special care.
*
* There may also be an implicit trim operation if the file system
* is mounted with -odiscard. The same protections must remain
* in place until the extents have been discarded completely when
* the transaction commit has completed.
*/ */
remove_em = (atomic_read(&block_group->trimming) == 0); remove_em = (atomic_read(&block_group->trimming) == 0);
/* /*
...@@ -9977,6 +10009,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ...@@ -9977,6 +10009,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock); spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) { while (!list_empty(&fs_info->unused_bgs)) {
u64 start, end; u64 start, end;
int trimming;
block_group = list_first_entry(&fs_info->unused_bgs, block_group = list_first_entry(&fs_info->unused_bgs,
struct btrfs_block_group_cache, struct btrfs_block_group_cache,
...@@ -10076,12 +10109,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) ...@@ -10076,12 +10109,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock); spin_unlock(&space_info->lock);
/* DISCARD can flip during remount */
trimming = btrfs_test_opt(root, DISCARD);
/* Implicit trim during transaction commit. */
if (trimming)
btrfs_get_block_group_trimming(block_group);
/* /*
* Btrfs_remove_chunk will abort the transaction if things go * Btrfs_remove_chunk will abort the transaction if things go
* horribly wrong. * horribly wrong.
*/ */
ret = btrfs_remove_chunk(trans, root, ret = btrfs_remove_chunk(trans, root,
block_group->key.objectid); block_group->key.objectid);
if (ret) {
if (trimming)
btrfs_put_block_group_trimming(block_group);
goto end_trans;
}
/*
* If we're not mounted with -odiscard, we can just forget
* about this block group. Otherwise we'll need to wait
* until transaction commit to do the actual discard.
*/
if (trimming) {
WARN_ON(!list_empty(&block_group->bg_list));
spin_lock(&trans->transaction->deleted_bgs_lock);
list_move(&block_group->bg_list,
&trans->transaction->deleted_bgs);
spin_unlock(&trans->transaction->deleted_bgs_lock);
btrfs_get_block_group(block_group);
}
end_trans: end_trans:
btrfs_end_transaction(trans, root); btrfs_end_transaction(trans, root);
next: next:
......
...@@ -3272,35 +3272,23 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, ...@@ -3272,35 +3272,23 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
return ret; return ret;
} }
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
u64 *trimmed, u64 start, u64 end, u64 minlen)
{ {
int ret; atomic_inc(&cache->trimming);
}
*trimmed = 0; void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
bool cleanup;
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
if (block_group->removed) { cleanup = (atomic_dec_and_test(&block_group->trimming) &&
spin_unlock(&block_group->lock); block_group->removed);
return 0;
}
atomic_inc(&block_group->trimming);
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); if (cleanup) {
if (ret)
goto out;
ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
out:
spin_lock(&block_group->lock);
if (atomic_dec_and_test(&block_group->trimming) &&
block_group->removed) {
struct extent_map_tree *em_tree;
struct extent_map *em;
spin_unlock(&block_group->lock);
lock_chunks(block_group->fs_info->chunk_root); lock_chunks(block_group->fs_info->chunk_root);
em_tree = &block_group->fs_info->mapping_tree.map_tree; em_tree = &block_group->fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock); write_lock(&em_tree->lock);
...@@ -3324,10 +3312,31 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, ...@@ -3324,10 +3312,31 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
* this block group have left 1 entry each one. Free them. * this block group have left 1 entry each one. Free them.
*/ */
__btrfs_remove_free_space_cache(block_group->free_space_ctl); __btrfs_remove_free_space_cache(block_group->free_space_ctl);
} else { }
}
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
int ret;
*trimmed = 0;
spin_lock(&block_group->lock);
if (block_group->removed) {
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
return 0;
} }
btrfs_get_block_group_trimming(block_group);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
if (ret)
goto out;
ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
out:
btrfs_put_block_group_trimming(block_group);
return ret; return ret;
} }
......
...@@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type; ...@@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data); static int btrfs_remount(struct super_block *sb, int *flags, char *data);
static const char *btrfs_decode_error(int errno) const char *btrfs_decode_error(int errno)
{ {
char *errstr = "unknown"; char *errstr = "unknown";
......
...@@ -258,6 +258,8 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type) ...@@ -258,6 +258,8 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
mutex_init(&cur_trans->cache_write_mutex); mutex_init(&cur_trans->cache_write_mutex);
cur_trans->num_dirty_bgs = 0; cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock); spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
spin_lock_init(&cur_trans->deleted_bgs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list); list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages, extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping); fs_info->btree_inode->i_mapping);
......
...@@ -74,6 +74,8 @@ struct btrfs_transaction { ...@@ -74,6 +74,8 @@ struct btrfs_transaction {
*/ */
struct mutex cache_write_mutex; struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock; spinlock_t dirty_bgs_lock;
struct list_head deleted_bgs;
spinlock_t deleted_bgs_lock;
struct btrfs_delayed_ref_root delayed_refs; struct btrfs_delayed_ref_root delayed_refs;
int aborted; int aborted;
int dirty_bg_run; int dirty_bg_run;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment