Commit d0bd4560 authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason

Btrfs: add fragment=* debug mount option

In tracking down these weird bitmap problems it was helpful to artificially
create an extremely fragmented file system.  These mount options let us either
fragment data or metadata or both.  With these options I could reproduce all
sorts of weird latencies and hangs that occur under extreme fragmentation and
get them fixed.  Thanks,
Signed-off-by: default avatarJosef Bacik <jbacik@fb.com>
Signed-off-by: default avatarChris Mason <clm@fb.com>
parent d9ee522b
...@@ -2145,6 +2145,8 @@ struct btrfs_ioctl_defrag_range_args { ...@@ -2145,6 +2145,8 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) #define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (8192) #define BTRFS_DEFAULT_MAX_INLINE (8192)
...@@ -2169,6 +2171,18 @@ struct btrfs_ioctl_defrag_range_args { ...@@ -2169,6 +2171,18 @@ struct btrfs_ioctl_defrag_range_args {
btrfs_clear_opt(root->fs_info->mount_opt, opt); \ btrfs_clear_opt(root->fs_info->mount_opt, opt); \
} }
#ifdef CONFIG_BTRFS_DEBUG
static inline int
btrfs_should_fragment_free_space(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group)
{
return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
(btrfs_test_opt(root, FRAGMENT_DATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
}
#endif
/* /*
* Requests for changes that need to be done during transaction commit. * Requests for changes that need to be done during transaction commit.
* *
......
...@@ -332,6 +332,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl) ...@@ -332,6 +332,27 @@ static void put_caching_control(struct btrfs_caching_control *ctl)
kfree(ctl); kfree(ctl);
} }
#ifdef CONFIG_BTRFS_DEBUG
static void fragment_free_space(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group)
{
u64 start = block_group->key.objectid;
u64 len = block_group->key.offset;
u64 chunk = block_group->flags & BTRFS_BLOCK_GROUP_METADATA ?
root->nodesize : root->sectorsize;
u64 step = chunk << 1;
while (len > chunk) {
btrfs_remove_free_space(block_group, start, chunk);
start += step;
if (len < step)
len = 0;
else
len -= step;
}
}
#endif
/* /*
* this is only called by cache_block_group, since we could have freed extents * this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet * we need to check the pinned_extents for any extents that can't be used yet
...@@ -388,6 +409,7 @@ static noinline void caching_thread(struct btrfs_work *work) ...@@ -388,6 +409,7 @@ static noinline void caching_thread(struct btrfs_work *work)
u64 last = 0; u64 last = 0;
u32 nritems; u32 nritems;
int ret = -ENOMEM; int ret = -ENOMEM;
bool wakeup = true;
caching_ctl = container_of(work, struct btrfs_caching_control, work); caching_ctl = container_of(work, struct btrfs_caching_control, work);
block_group = caching_ctl->block_group; block_group = caching_ctl->block_group;
...@@ -400,6 +422,15 @@ static noinline void caching_thread(struct btrfs_work *work) ...@@ -400,6 +422,15 @@ static noinline void caching_thread(struct btrfs_work *work)
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
#ifdef CONFIG_BTRFS_DEBUG
/*
* If we're fragmenting we don't want to make anybody think we can
* allocate from this block group until we've had a chance to fragment
* the free space.
*/
if (btrfs_should_fragment_free_space(extent_root, block_group))
wakeup = false;
#endif
/* /*
* We don't want to deadlock with somebody trying to allocate a new * We don't want to deadlock with somebody trying to allocate a new
* extent for the extent root while also trying to search the extent * extent for the extent root while also trying to search the extent
...@@ -441,6 +472,7 @@ static noinline void caching_thread(struct btrfs_work *work) ...@@ -441,6 +472,7 @@ static noinline void caching_thread(struct btrfs_work *work)
if (need_resched() || if (need_resched() ||
rwsem_is_contended(&fs_info->commit_root_sem)) { rwsem_is_contended(&fs_info->commit_root_sem)) {
if (wakeup)
caching_ctl->progress = last; caching_ctl->progress = last;
btrfs_release_path(path); btrfs_release_path(path);
up_read(&fs_info->commit_root_sem); up_read(&fs_info->commit_root_sem);
...@@ -464,6 +496,7 @@ static noinline void caching_thread(struct btrfs_work *work) ...@@ -464,6 +496,7 @@ static noinline void caching_thread(struct btrfs_work *work)
key.offset = 0; key.offset = 0;
key.type = BTRFS_EXTENT_ITEM_KEY; key.type = BTRFS_EXTENT_ITEM_KEY;
if (wakeup)
caching_ctl->progress = last; caching_ctl->progress = last;
btrfs_release_path(path); btrfs_release_path(path);
goto next; goto next;
...@@ -491,6 +524,7 @@ static noinline void caching_thread(struct btrfs_work *work) ...@@ -491,6 +524,7 @@ static noinline void caching_thread(struct btrfs_work *work)
if (total_found > (1024 * 1024 * 2)) { if (total_found > (1024 * 1024 * 2)) {
total_found = 0; total_found = 0;
if (wakeup)
wake_up(&caching_ctl->wait); wake_up(&caching_ctl->wait);
} }
} }
...@@ -501,13 +535,27 @@ static noinline void caching_thread(struct btrfs_work *work) ...@@ -501,13 +535,27 @@ static noinline void caching_thread(struct btrfs_work *work)
total_found += add_new_free_space(block_group, fs_info, last, total_found += add_new_free_space(block_group, fs_info, last,
block_group->key.objectid + block_group->key.objectid +
block_group->key.offset); block_group->key.offset);
caching_ctl->progress = (u64)-1;
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
block_group->caching_ctl = NULL; block_group->caching_ctl = NULL;
block_group->cached = BTRFS_CACHE_FINISHED; block_group->cached = BTRFS_CACHE_FINISHED;
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(extent_root, block_group)) {
u64 bytes_used;
spin_lock(&block_group->space_info->lock);
spin_lock(&block_group->lock);
bytes_used = block_group->key.offset -
btrfs_block_group_used(&block_group->item);
block_group->space_info->bytes_used += bytes_used >> 1;
spin_unlock(&block_group->lock);
spin_unlock(&block_group->space_info->lock);
fragment_free_space(extent_root, block_group);
}
#endif
caching_ctl->progress = (u64)-1;
err: err:
btrfs_free_path(path); btrfs_free_path(path);
up_read(&fs_info->commit_root_sem); up_read(&fs_info->commit_root_sem);
...@@ -607,6 +655,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, ...@@ -607,6 +655,22 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
} }
} }
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
#ifdef CONFIG_BTRFS_DEBUG
if (ret == 1 &&
btrfs_should_fragment_free_space(fs_info->extent_root,
cache)) {
u64 bytes_used;
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
bytes_used = cache->key.offset -
btrfs_block_group_used(&cache->item);
cache->space_info->bytes_used += bytes_used >> 1;
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
fragment_free_space(fs_info->extent_root, cache);
}
#endif
mutex_unlock(&caching_ctl->mutex); mutex_unlock(&caching_ctl->mutex);
wake_up(&caching_ctl->wait); wake_up(&caching_ctl->wait);
...@@ -9624,6 +9688,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -9624,6 +9688,14 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
free_excluded_extents(root, cache); free_excluded_extents(root, cache);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(root, cache)) {
u64 new_bytes_used = size - bytes_used;
bytes_used += new_bytes_used >> 1;
fragment_free_space(root, cache);
}
#endif
/* /*
* Call to ensure the corresponding space_info object is created and * Call to ensure the corresponding space_info object is created and
* assigned to our block group, but don't update its counters just yet. * assigned to our block group, but don't update its counters just yet.
......
...@@ -1951,12 +1951,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1951,12 +1951,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info) struct btrfs_free_space *info)
{ {
struct btrfs_block_group_cache *block_group = ctl->private; struct btrfs_block_group_cache *block_group = ctl->private;
bool forced = false;
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
block_group))
forced = true;
#endif
/* /*
* If we are below the extents threshold then we can add this as an * If we are below the extents threshold then we can add this as an
* extent, and don't have to deal with the bitmap * extent, and don't have to deal with the bitmap
*/ */
if (ctl->free_extents < ctl->extents_thresh) { if (!forced && ctl->free_extents < ctl->extents_thresh) {
/* /*
* If this block group has some small extents we don't want to * If this block group has some small extents we don't want to
* use up all of our free slots in the cache with them, we want * use up all of our free slots in the cache with them, we want
......
...@@ -303,6 +303,9 @@ enum { ...@@ -303,6 +303,9 @@ enum {
Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_datasum, Opt_treelog, Opt_noinode_cache,
#ifdef CONFIG_BTRFS_DEBUG
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
#endif
Opt_err, Opt_err,
}; };
...@@ -355,6 +358,11 @@ static match_table_t tokens = { ...@@ -355,6 +358,11 @@ static match_table_t tokens = {
{Opt_rescan_uuid_tree, "rescan_uuid_tree"}, {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
{Opt_fatal_errors, "fatal_errors=%s"}, {Opt_fatal_errors, "fatal_errors=%s"},
{Opt_commit_interval, "commit=%d"}, {Opt_commit_interval, "commit=%d"},
#ifdef CONFIG_BTRFS_DEBUG
{Opt_fragment_data, "fragment=data"},
{Opt_fragment_metadata, "fragment=metadata"},
{Opt_fragment_all, "fragment=all"},
#endif
{Opt_err, NULL}, {Opt_err, NULL},
}; };
...@@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) ...@@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
} }
break; break;
#ifdef CONFIG_BTRFS_DEBUG
case Opt_fragment_all:
btrfs_info(root->fs_info, "fragmenting all space");
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
break;
case Opt_fragment_metadata:
btrfs_info(root->fs_info, "fragmenting metadata");
btrfs_set_opt(info->mount_opt,
FRAGMENT_METADATA);
break;
case Opt_fragment_data:
btrfs_info(root->fs_info, "fragmenting data");
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
break;
#endif
case Opt_err: case Opt_err:
btrfs_info(root->fs_info, "unrecognized mount option '%s'", p); btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
ret = -EINVAL; ret = -EINVAL;
...@@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) ...@@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",fatal_errors=panic"); seq_puts(seq, ",fatal_errors=panic");
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
seq_printf(seq, ",commit=%d", info->commit_interval); seq_printf(seq, ",commit=%d", info->commit_interval);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_test_opt(root, FRAGMENT_DATA))
seq_puts(seq, ",fragment=data");
if (btrfs_test_opt(root, FRAGMENT_METADATA))
seq_puts(seq, ",fragment=metadata");
#endif
seq_printf(seq, ",subvolid=%llu", seq_printf(seq, ",subvolid=%llu",
BTRFS_I(d_inode(dentry))->root->root_key.objectid); BTRFS_I(d_inode(dentry))->root->root_key.objectid);
seq_puts(seq, ",subvol="); seq_puts(seq, ",subvol=");
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "btrfs-tests.h" #include "btrfs-tests.h"
#include "../ctree.h" #include "../ctree.h"
#include "../disk-io.h"
#include "../free-space-cache.h" #include "../free-space-cache.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
...@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void) ...@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
kfree(cache); kfree(cache);
return NULL; return NULL;
} }
cache->fs_info = btrfs_alloc_dummy_fs_info();
if (!cache->fs_info) {
kfree(cache->free_space_ctl);
kfree(cache);
return NULL;
}
cache->key.objectid = 0; cache->key.objectid = 0;
cache->key.offset = 1024 * 1024 * 1024; cache->key.offset = 1024 * 1024 * 1024;
...@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) ...@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
int btrfs_test_free_space_cache(void) int btrfs_test_free_space_cache(void)
{ {
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
int ret; struct btrfs_root *root = NULL;
int ret = -ENOMEM;
test_msg("Running btrfs free space cache tests\n"); test_msg("Running btrfs free space cache tests\n");
...@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void) ...@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
return 0; return 0;
} }
root = btrfs_alloc_dummy_root();
if (!root)
goto out;
root->fs_info = btrfs_alloc_dummy_fs_info();
if (!root->fs_info)
goto out;
root->fs_info->extent_root = root;
cache->fs_info = root->fs_info;
ret = test_extents(cache); ret = test_extents(cache);
if (ret) if (ret)
goto out; goto out;
...@@ -904,6 +923,7 @@ int btrfs_test_free_space_cache(void) ...@@ -904,6 +923,7 @@ int btrfs_test_free_space_cache(void)
__btrfs_remove_free_space_cache(cache->free_space_ctl); __btrfs_remove_free_space_cache(cache->free_space_ctl);
kfree(cache->free_space_ctl); kfree(cache->free_space_ctl);
kfree(cache); kfree(cache);
btrfs_free_dummy_root(root);
test_msg("Free space cache tests finished\n"); test_msg("Free space cache tests finished\n");
return ret; return ret;
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment