Commit c426d991 authored by Shin'ichiro Kawasaki's avatar Shin'ichiro Kawasaki Committed by Jaegeuk Kim

f2fs: Check write pointer consistency of open zones

On sudden f2fs shutdown, write pointers of zoned block devices can go
further but f2fs meta data keeps current segments at positions before the
write operations. After remounting the f2fs, this inconsistency causes
write operations not at write pointers and "Unaligned write command"
error is reported.

To avoid the error, compare current segments with write pointers of open
zones the current segments point to, during mount operation. If the write
pointer position is not aligned with the current segment position, assign
a new zone to the current segment. Also check the newly assigned zone has
write pointer at zone start. If not, reset write pointer of the zone.

Perform the consistency check during fsync recovery. Not to lose the
fsync data, do the check after fsync data gets restored and before
checkpoint commit which flushes data at current segment positions. Not to
cause conflict with kworker's dirfy data/node flush, do the fix within
SBI_POR_DOING protection.
Signed-off-by: default avatarShin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Reviewed-by: default avatarChao Yu <yuchao0@huawei.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent dd973007
...@@ -3155,6 +3155,7 @@ void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); ...@@ -3155,6 +3155,7 @@ void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk);
int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
unsigned int val, int alloc); unsigned int val, int alloc);
void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi);
int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
int __init f2fs_create_segment_manager_caches(void); int __init f2fs_create_segment_manager_caches(void);
......
...@@ -723,6 +723,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) ...@@ -723,6 +723,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0; int ret = 0;
unsigned long s_flags = sbi->sb->s_flags; unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false; bool need_writecp = false;
bool fix_curseg_write_pointer = false;
#ifdef CONFIG_QUOTA #ifdef CONFIG_QUOTA
int quota_enabled; int quota_enabled;
#endif #endif
...@@ -774,6 +775,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) ...@@ -774,6 +775,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
sbi->sb->s_flags = s_flags; sbi->sb->s_flags = s_flags;
} }
skip: skip:
fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
destroy_fsync_dnodes(&inode_list, err); destroy_fsync_dnodes(&inode_list, err);
destroy_fsync_dnodes(&tmp_inode_list, err); destroy_fsync_dnodes(&tmp_inode_list, err);
...@@ -784,9 +787,22 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) ...@@ -784,9 +787,22 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
if (err) { if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi)); truncate_inode_pages_final(NODE_MAPPING(sbi));
truncate_inode_pages_final(META_MAPPING(sbi)); truncate_inode_pages_final(META_MAPPING(sbi));
} else {
clear_sbi_flag(sbi, SBI_POR_DOING);
} }
/*
* If fsync data succeeds or there is no fsync data to recover,
* and the f2fs is not read only, check and fix zoned block devices'
* write pointer consistency.
*/
if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) &&
f2fs_sb_has_blkzoned(sbi)) {
err = f2fs_fix_curseg_write_pointer(sbi);
ret = err;
}
if (!err)
clear_sbi_flag(sbi, SBI_POR_DOING);
mutex_unlock(&sbi->cp_mutex); mutex_unlock(&sbi->cp_mutex);
/* let's drop all the directory inodes for clean checkpoint */ /* let's drop all the directory inodes for clean checkpoint */
......
...@@ -4368,6 +4368,137 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) ...@@ -4368,6 +4368,137 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
return 0; return 0;
} }
#ifdef CONFIG_BLK_DEV_ZONED
static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
block_t zone_blkaddr)
{
int i;
for (i = 0; i < sbi->s_ndevs; i++) {
if (!bdev_is_zoned(FDEV(i).bdev))
continue;
if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
zone_blkaddr <= FDEV(i).end_blk))
return &FDEV(i);
}
return NULL;
}
static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
void *data) {
memcpy(data, zone, sizeof(struct blk_zone));
return 0;
}
static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *cs = CURSEG_I(sbi, type);
struct f2fs_dev_info *zbd;
struct blk_zone zone;
unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
block_t cs_zone_block, wp_block;
unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
sector_t zone_sector;
int err;
cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
zbd = get_target_zoned_dev(sbi, cs_zone_block);
if (!zbd)
return 0;
/* report zone for the sector the curseg points to */
zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
<< log_sectors_per_block;
err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
report_one_zone_cb, &zone);
if (err != 1) {
f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
zbd->path, err);
return err;
}
if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
wp_segno = GET_SEGNO(sbi, wp_block);
wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
wp_sector_off == 0)
return 0;
f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
"curseg[0x%x,0x%x] wp[0x%x,0x%x]",
type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
f2fs_notice(sbi, "Assign new section to curseg[%d]: "
"curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
allocate_segment_by_default(sbi, type, true);
/* check newly assigned zone */
cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
zbd = get_target_zoned_dev(sbi, cs_zone_block);
if (!zbd)
return 0;
zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
<< log_sectors_per_block;
err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
report_one_zone_cb, &zone);
if (err != 1) {
f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
zbd->path, err);
return err;
}
if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
if (zone.wp != zone.start) {
f2fs_notice(sbi,
"New zone for curseg[%d] is not yet discarded. "
"Reset the zone: curseg[0x%x,0x%x]",
type, cs->segno, cs->next_blkoff);
err = __f2fs_issue_discard_zone(sbi, zbd->bdev,
zone_sector >> log_sectors_per_block,
zone.len >> log_sectors_per_block);
if (err) {
f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
zbd->path, err);
return err;
}
}
return 0;
}
int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
int i, ret;
for (i = 0; i < NO_CHECK_TYPE; i++) {
ret = fix_curseg_write_pointer(sbi, i);
if (ret)
return ret;
}
return 0;
}
#else
int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
return 0;
}
#endif
/* /*
* Update min, max modified time for cost-benefit GC algorithm * Update min, max modified time for cost-benefit GC algorithm
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment