Commit 6691d940 authored by Daeho Jeong's avatar Daeho Jeong Committed by Jaegeuk Kim

f2fs: introduce fragment allocation mode mount option

Added two options into "mode=" mount option to make it possible for
developers to simulate filesystem fragmentation/after-GC situation
itself. The developers use these modes to understand filesystem
fragmentation/after-GC condition well, and eventually get some
insights to handle them better.

"fragment:segment": f2fs allocates a new segment in ramdom position.
		With this, we can simulate the after-GC condition.
"fragment:block" : We can scatter block allocation with
		"max_fragment_chunk" and "max_fragment_hole" sysfs
		nodes. f2fs will allocate 1..<max_fragment_chunk>
		blocks in a chunk and make a hole in the length of
		1..<max_fragment_hole> by turns	in a newly allocated
		free segment. Plus, this mode implicitly enables
		"fragment:segment" option for more randomness.
Reviewed-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarDaeho Jeong <daehojeong@google.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 84eab2a8
...@@ -512,3 +512,19 @@ Date: July 2021 ...@@ -512,3 +512,19 @@ Date: July 2021
Contact: "Daeho Jeong" <daehojeong@google.com> Contact: "Daeho Jeong" <daehojeong@google.com>
Description: You can control the multiplier value of bdi device readahead window size Description: You can control the multiplier value of bdi device readahead window size
between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option. between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
What: /sys/fs/f2fs/<disk>/max_fragment_chunk
Date: August 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.
What: /sys/fs/f2fs/<disk>/max_fragment_hole
Date: August 2021
Contact: "Daeho Jeong" <daehojeong@google.com>
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
in the length of 1..<max_fragment_hole> by turns. This value can be set
between 1..512 and the default value is 4.
...@@ -201,6 +201,24 @@ fault_type=%d Support configuring fault injection type, should be ...@@ -201,6 +201,24 @@ fault_type=%d Support configuring fault injection type, should be
mode=%s Control block allocation mode which supports "adaptive" mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random and "lfs". In "lfs" mode, there should be no random
writes towards main area. writes towards main area.
"fragment:segment" and "fragment:block" are newly added here.
These are developer options for experiments to simulate filesystem
fragmentation/after-GC situation itself. The developers use these
modes to understand filesystem fragmentation/after-GC condition well,
and eventually get some insights to handle them better.
In "fragment:segment", f2fs allocates a new segment in ramdom
position. With this, we can simulate the after-GC condition.
In "fragment:block", we can scatter block allocation with
"max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
We added some randomness to both chunk and hole size to make
it close to realistic IO pattern. So, in this mode, f2fs will allocate
1..<max_fragment_chunk> blocks in a chunk and make a hole in the
length of 1..<max_fragment_hole> by turns. With this, the newly
allocated blocks will be scattered throughout the whole partition.
Note that "fragment:block" implicitly enables "fragment:segment"
option for more randomness.
Please, use these options for your experiments and we strongly
recommend to re-format the filesystem after using these options.
io_bits=%u Set the bit size of write IO requests. It should be set io_bits=%u Set the bit size of write IO requests. It should be set
with "mode=lfs". with "mode=lfs".
usrquota Enable plain user disk quota accounting. usrquota Enable plain user disk quota accounting.
......
...@@ -1289,6 +1289,8 @@ enum { ...@@ -1289,6 +1289,8 @@ enum {
enum { enum {
FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */
FS_MODE_LFS, /* use lfs allocation only */ FS_MODE_LFS, /* use lfs allocation only */
FS_MODE_FRAGMENT_SEG, /* segment fragmentation mode */
FS_MODE_FRAGMENT_BLK, /* block fragmentation mode */
}; };
enum { enum {
...@@ -1759,6 +1761,9 @@ struct f2fs_sb_info { ...@@ -1759,6 +1761,9 @@ struct f2fs_sb_info {
unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */ unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */
int max_fragment_chunk; /* max chunk size for block fragmentation mode */
int max_fragment_hole; /* max hole size for block fragmentation mode */
#ifdef CONFIG_F2FS_FS_COMPRESSION #ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */ struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size; /* default page array slab size */ unsigned int page_array_slab_size; /* default page array slab size */
...@@ -3519,6 +3524,16 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, ...@@ -3519,6 +3524,16 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
unsigned int segno); unsigned int segno);
#define DEF_FRAGMENT_SIZE 4
#define MIN_FRAGMENT_SIZE 1
#define MAX_FRAGMENT_SIZE 512
static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi)
{
return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG ||
F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK;
}
/* /*
* checkpoint.c * checkpoint.c
*/ */
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/random.h>
#include "f2fs.h" #include "f2fs.h"
#include "node.h" #include "node.h"
...@@ -257,7 +258,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, ...@@ -257,7 +258,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->max_search = sbi->max_victim_search; p->max_search = sbi->max_victim_search;
/* let's select beginning hot/small space first in no_heap mode*/ /* let's select beginning hot/small space first in no_heap mode*/
if (test_opt(sbi, NOHEAP) && if (f2fs_need_rand_seg(sbi))
p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
else if (test_opt(sbi, NOHEAP) &&
(type == CURSEG_HOT_DATA || IS_NODESEG(type))) (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
p->offset = 0; p->offset = 0;
else else
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/freezer.h> #include <linux/freezer.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/random.h>
#include "f2fs.h" #include "f2fs.h"
#include "segment.h" #include "segment.h"
...@@ -2649,6 +2650,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) ...@@ -2649,6 +2650,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
unsigned short seg_type = curseg->seg_type; unsigned short seg_type = curseg->seg_type;
sanity_check_seg_type(sbi, seg_type); sanity_check_seg_type(sbi, seg_type);
if (f2fs_need_rand_seg(sbi))
return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
/* if segs_per_sec is large than 1, we need to keep original policy. */ /* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi)) if (__is_large_section(sbi))
...@@ -2700,6 +2703,9 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) ...@@ -2700,6 +2703,9 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
curseg->next_segno = segno; curseg->next_segno = segno;
reset_curseg(sbi, type, 1); reset_curseg(sbi, type, 1);
curseg->alloc_type = LFS; curseg->alloc_type = LFS;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
curseg->fragment_remained_chunk =
prandom_u32() % sbi->max_fragment_chunk + 1;
} }
static int __next_free_blkoff(struct f2fs_sb_info *sbi, static int __next_free_blkoff(struct f2fs_sb_info *sbi,
...@@ -2726,12 +2732,22 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi, ...@@ -2726,12 +2732,22 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi,
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
struct curseg_info *seg) struct curseg_info *seg)
{ {
if (seg->alloc_type == SSR) if (seg->alloc_type == SSR) {
seg->next_blkoff = seg->next_blkoff =
__next_free_blkoff(sbi, seg->segno, __next_free_blkoff(sbi, seg->segno,
seg->next_blkoff + 1); seg->next_blkoff + 1);
else } else {
seg->next_blkoff++; seg->next_blkoff++;
if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
/* To allocate block chunks in different sizes, use random number */
if (--seg->fragment_remained_chunk <= 0) {
seg->fragment_remained_chunk =
prandom_u32() % sbi->max_fragment_chunk + 1;
seg->next_blkoff +=
prandom_u32() % sbi->max_fragment_hole + 1;
}
}
}
} }
bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
......
...@@ -314,6 +314,7 @@ struct curseg_info { ...@@ -314,6 +314,7 @@ struct curseg_info {
unsigned short next_blkoff; /* next block offset to write */ unsigned short next_blkoff; /* next block offset to write */
unsigned int zone; /* current zone number */ unsigned int zone; /* current zone number */
unsigned int next_segno; /* preallocated segment */ unsigned int next_segno; /* preallocated segment */
int fragment_remained_chunk; /* remained block size in a chunk for block fragmentation mode */
bool inited; /* indicate inmem log is inited */ bool inited; /* indicate inmem log is inited */
}; };
......
...@@ -817,6 +817,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) ...@@ -817,6 +817,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
} else if (!strcmp(name, "lfs")) { } else if (!strcmp(name, "lfs")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
} else if (!strcmp(name, "fragment:segment")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG;
} else if (!strcmp(name, "fragment:block")) {
F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK;
} else { } else {
kfree(name); kfree(name);
return -EINVAL; return -EINVAL;
...@@ -1896,6 +1900,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) ...@@ -1896,6 +1900,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, "adaptive"); seq_puts(seq, "adaptive");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS) else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS)
seq_puts(seq, "lfs"); seq_puts(seq, "lfs");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG)
seq_puts(seq, "fragment:segment");
else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
seq_puts(seq, "fragment:block");
seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
if (test_opt(sbi, RESERVE_ROOT)) if (test_opt(sbi, RESERVE_ROOT))
seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
...@@ -3523,6 +3531,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) ...@@ -3523,6 +3531,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
sbi->migration_granularity = sbi->segs_per_sec; sbi->migration_granularity = sbi->segs_per_sec;
sbi->seq_file_ra_mul = MIN_RA_MUL; sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
sbi->dir_level = DEF_DIR_LEVEL; sbi->dir_level = DEF_DIR_LEVEL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
......
...@@ -551,6 +551,22 @@ static ssize_t __sbi_store(struct f2fs_attr *a, ...@@ -551,6 +551,22 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count; return count;
} }
if (!strcmp(a->attr.name, "max_fragment_chunk")) {
if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
sbi->max_fragment_chunk = t;
else
return -EINVAL;
return count;
}
if (!strcmp(a->attr.name, "max_fragment_hole")) {
if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
sbi->max_fragment_hole = t;
else
return -EINVAL;
return count;
}
*ui = (unsigned int)t; *ui = (unsigned int)t;
return count; return count;
...@@ -781,6 +797,8 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold); ...@@ -781,6 +797,8 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr) #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = { static struct attribute *f2fs_attrs[] = {
...@@ -859,6 +877,8 @@ static struct attribute *f2fs_attrs[] = { ...@@ -859,6 +877,8 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(seq_file_ra_mul), ATTR_LIST(seq_file_ra_mul),
ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_segment_mode),
ATTR_LIST(gc_reclaimed_segments), ATTR_LIST(gc_reclaimed_segments),
ATTR_LIST(max_fragment_chunk),
ATTR_LIST(max_fragment_hole),
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(f2fs); ATTRIBUTE_GROUPS(f2fs);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment