Commit fe6f0ed0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'f2fs-for-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've tuned f2fs to improve general performance by
  serializing block allocation and enhancing discard flows like fstrim
  which avoids user IO contention. And we've added fsync_mode=nobarrier
  which gives an option to user where it skips issuing cache_flush
  commands to underlying flash storage. And there are many bug fixes
  related to fuzzed images, revoked atomic writes, quota ops, and minor
  direct IO.

  Enhancements:
   - add fsync_mode=nobarrier which bypasses cache_flush command
   - enhance the discarding flow which avoids user IOs and issues in
     LBA order
   - readahead some encrypted blocks during GC
   - enable in-memory inode checksum to verify the blocks if
     F2FS_CHECK_FS is set
   - enhance nat_bits behavior
   - set -o discard by default
   - set REQ_RAHEAD to bio in ->readpages

  Bug fixes:
   - fix a corner case to corrupt atomic_writes revoking flow
   - revisit i_gc_rwsem to fix race conditions
   - fix some dio behaviors captured by xfstests
   - correct handling errors given by quota-related failures
   - add many sanity check flows to avoid fuzz test failures
   - add more error number propagation to their callers
   - fix several corner cases to continue fault injection w/ shutdown
     loop"

* tag 'f2fs-for-4.19' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (89 commits)
  f2fs: readahead encrypted block during GC
  f2fs: avoid fi->i_gc_rwsem[WRITE] lock in f2fs_gc
  f2fs: fix performance issue observed with multi-thread sequential read
  f2fs: fix to skip verifying block address for non-regular inode
  f2fs: rework fault injection handling to avoid a warning
  f2fs: support fault_type mount option
  f2fs: fix to return success when trimming meta area
  f2fs: fix use-after-free of dicard command entry
  f2fs: support discard submission error injection
  f2fs: split discard command in prior to block layer
  f2fs: wake up gc thread immediately when gc_urgent is set
  f2fs: fix incorrect range->len in f2fs_trim_fs()
  f2fs: refresh recent accessed nat entry in lru list
  f2fs: fix avoid race between truncate and background GC
  f2fs: avoid race between zero_range and background GC
  f2fs: fix to do sanity check with block address in main area v2
  f2fs: fix to do sanity check with inline flags
  f2fs: fix to reset i_gc_failures correctly
  f2fs: fix invalid memory access
  f2fs: fix to avoid broken of dnode block list
  ...
parents 6faf05c2 6aa58d8a
......@@ -51,6 +51,14 @@ Description:
Controls the dirty page count condition for the in-place-update
policies.
What: /sys/fs/f2fs/<disk>/min_seq_blocks
Date: August 2018
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the dirty page count condition for batched sequential
writes in ->writepages.
What: /sys/fs/f2fs/<disk>/min_hot_blocks
Date: March 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
......
......@@ -157,6 +157,24 @@ data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
fault_injection=%d Enable fault injection in all supported types with
specified injection rate.
fault_type=%d Support configuring fault injection type, should be
enabled with fault_injection option, fault type value
is shown below, it supports single or combined type.
Type_Name Type_Value
FAULT_KMALLOC 0x000000001
FAULT_KVMALLOC 0x000000002
FAULT_PAGE_ALLOC 0x000000004
FAULT_PAGE_GET 0x000000008
FAULT_ALLOC_BIO 0x000000010
FAULT_ALLOC_NID 0x000000020
FAULT_ORPHAN 0x000000040
FAULT_BLOCK 0x000000080
FAULT_DIR_DEPTH 0x000000100
FAULT_EVICT_INODE 0x000000200
FAULT_TRUNCATE 0x000000400
FAULT_IO 0x000000800
FAULT_CHECKPOINT 0x000001000
FAULT_DISCARD 0x000002000
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
......
This diff is collapsed.
This diff is collapsed.
......@@ -215,7 +215,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
si->base_mem += NM_I(sbi)->nat_blocks *
f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
si->base_mem += NM_I(sbi)->nat_blocks / 8;
si->base_mem += NM_I(sbi)->nat_blocks * sizeof(unsigned short);
......
......@@ -517,12 +517,11 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
}
start:
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
#endif
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
......
This diff is collapsed.
This diff is collapsed.
......@@ -53,12 +53,10 @@ static int gc_thread_func(void *data)
continue;
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
#endif
if (!sb_start_write_trylock(sbi->sb))
continue;
......@@ -517,7 +515,11 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
continue;
}
f2fs_get_node_info(sbi, nid, &ni);
if (f2fs_get_node_info(sbi, nid, &ni)) {
f2fs_put_page(node_page, 1);
continue;
}
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
......@@ -576,7 +578,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(node_page))
return false;
f2fs_get_node_info(sbi, nid, dni);
if (f2fs_get_node_info(sbi, nid, dni)) {
f2fs_put_page(node_page, 1);
return false;
}
if (sum->version != dni->version) {
f2fs_msg(sbi->sb, KERN_WARNING,
......@@ -594,6 +599,72 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
static int ra_data_block(struct inode *inode, pgoff_t index)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
struct extent_info ei = {0, 0, 0};
struct f2fs_io_info fio = {
.sbi = sbi,
.ino = inode->i_ino,
.type = DATA,
.temp = COLD,
.op = REQ_OP_READ,
.op_flags = 0,
.encrypted_page = NULL,
.in_list = false,
.retry = false,
};
int err;
page = f2fs_grab_cache_page(mapping, index, true);
if (!page)
return -ENOMEM;
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
goto got_it;
}
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err)
goto put_page;
f2fs_put_dnode(&dn);
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC))) {
err = -EFAULT;
goto put_page;
}
got_it:
/* read page */
fio.page = page;
fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(sbi),
dn.data_blkaddr,
FGP_LOCK | FGP_CREAT, GFP_NOFS);
if (!fio.encrypted_page) {
err = -ENOMEM;
goto put_page;
}
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_encrypted_page;
f2fs_put_page(fio.encrypted_page, 0);
f2fs_put_page(page, 1);
return 0;
put_encrypted_page:
f2fs_put_page(fio.encrypted_page, 1);
put_page:
f2fs_put_page(page, 1);
return err;
}
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
......@@ -615,7 +686,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
struct dnode_of_data dn;
struct f2fs_summary sum;
struct node_info ni;
struct page *page;
struct page *page, *mpage;
block_t newaddr;
int err;
bool lfs_mode = test_opt(fio.sbi, LFS);
......@@ -655,7 +726,10 @@ static void move_data_block(struct inode *inode, block_t bidx,
*/
f2fs_wait_on_page_writeback(page, DATA, true);
f2fs_get_node_info(fio.sbi, dn.nid, &ni);
err = f2fs_get_node_info(fio.sbi, dn.nid, &ni);
if (err)
goto put_out;
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
......@@ -675,6 +749,23 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto recover_block;
}
mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
if (mpage) {
bool updated = false;
if (PageUptodate(mpage)) {
memcpy(page_address(fio.encrypted_page),
page_address(mpage), PAGE_SIZE);
updated = true;
}
f2fs_put_page(mpage, 1);
invalidate_mapping_pages(META_MAPPING(fio.sbi),
fio.old_blkaddr, fio.old_blkaddr);
if (updated)
goto write_page;
}
err = f2fs_submit_page_bio(&fio);
if (err)
goto put_page_out;
......@@ -691,6 +782,7 @@ static void move_data_block(struct inode *inode, block_t bidx,
goto put_page_out;
}
write_page:
set_page_dirty(fio.encrypted_page);
f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
......@@ -865,22 +957,30 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (IS_ERR(inode) || is_bad_inode(inode))
continue;
/* if inode uses special I/O path, let's go phase 3 */
if (f2fs_post_read_required(inode)) {
add_gc_inode(gc_list, inode);
if (!down_write_trylock(
&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
iput(inode);
sbi->skipped_gc_rwsem++;
continue;
}
if (!down_write_trylock(
&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
start_bidx = f2fs_start_bidx_of_node(nofs, inode) +
ofs_in_node;
if (f2fs_post_read_required(inode)) {
int err = ra_data_block(inode, start_bidx);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (err) {
iput(inode);
continue;
}
add_gc_inode(gc_list, inode);
continue;
}
start_bidx = f2fs_start_bidx_of_node(nofs, inode);
data_page = f2fs_get_read_data_page(inode,
start_bidx + ofs_in_node, REQ_RAHEAD,
true);
start_bidx, REQ_RAHEAD, true);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (IS_ERR(data_page)) {
iput(inode);
......@@ -903,6 +1003,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
continue;
if (!down_write_trylock(
&fi->i_gc_rwsem[WRITE])) {
sbi->skipped_gc_rwsem++;
up_write(&fi->i_gc_rwsem[READ]);
continue;
}
......@@ -986,7 +1087,13 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
goto next;
sum = page_address(sum_page);
f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
if (type != GET_SUM_TYPE((&sum->footer))) {
f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
"type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
goto next;
}
/*
* this is to avoid deadlock:
......@@ -1034,6 +1141,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
unsigned long long first_skipped;
unsigned int skipped_round = 0, round = 0;
trace_f2fs_gc_begin(sbi->sb, sync, background,
......@@ -1046,6 +1154,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
sbi->skipped_gc_rwsem = 0;
first_skipped = last_skipped;
gc_more:
if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
ret = -EINVAL;
......@@ -1087,7 +1197,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
total_freed += seg_freed;
if (gc_type == FG_GC) {
if (sbi->skipped_atomic_files[FG_GC] > last_skipped)
if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
sbi->skipped_gc_rwsem)
skipped_round++;
last_skipped = sbi->skipped_atomic_files[FG_GC];
round++;
......@@ -1096,15 +1207,23 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
if (!sync) {
if (sync)
goto stop;
if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
if (skipped_round > MAX_SKIP_ATOMIC_COUNT &&
skipped_round * 2 >= round)
f2fs_drop_inmem_pages_all(sbi, true);
if (skipped_round <= MAX_SKIP_GC_COUNT ||
skipped_round * 2 < round) {
segno = NULL_SEGNO;
goto gc_more;
}
if (first_skipped < last_skipped &&
(last_skipped - first_skipped) >
sbi->skipped_gc_rwsem) {
f2fs_drop_inmem_pages_all(sbi, true);
segno = NULL_SEGNO;
goto gc_more;
}
if (gc_type == FG_GC)
ret = f2fs_write_checkpoint(sbi, &cpc);
}
......
......@@ -121,6 +121,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
.encrypted_page = NULL,
.io_type = FS_DATA_IO,
};
struct node_info ni;
int dirty, err;
if (!f2fs_exist_data(dn->inode))
......@@ -130,6 +131,24 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
err = f2fs_get_node_info(fio.sbi, dn->nid, &ni);
if (err) {
f2fs_put_dnode(dn);
return err;
}
fio.version = ni.version;
if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(dn);
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
f2fs_msg(fio.sbi->sb, KERN_WARNING,
"%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
"run fsck to fix.",
__func__, dn->inode->i_ino, dn->data_blkaddr);
return -EINVAL;
}
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
f2fs_do_read_inline_data(page, dn->inode_page);
......@@ -363,6 +382,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
if (err)
goto out;
if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(&dn);
set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
"%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
"run fsck to fix.",
__func__, dir->i_ino, dn.data_blkaddr);
err = -EINVAL;
goto out;
}
f2fs_wait_on_page_writeback(page, DATA, true);
dentry_blk = page_address(page);
......@@ -477,6 +507,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
return 0;
recover:
lock_page(ipage);
f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir));
f2fs_i_depth_write(dir, 0);
f2fs_i_size_write(dir, MAX_INLINE_DATA(dir));
......@@ -668,7 +699,10 @@ int f2fs_inline_data_fiemap(struct inode *inode,
ilen = start + len;
ilen -= start;
f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni);
if (err)
goto out;
byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits;
byteaddr += (char *)inline_data_addr(inode, ipage) -
(char *)F2FS_INODE(ipage);
......
......@@ -68,13 +68,16 @@ static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
}
}
static bool __written_first_block(struct f2fs_inode *ri)
static int __written_first_block(struct f2fs_sb_info *sbi,
struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
if (is_valid_blkaddr(addr))
return true;
return false;
if (!__is_valid_data_blkaddr(addr))
return 1;
if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC))
return -EFAULT;
return 0;
}
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
......@@ -121,7 +124,7 @@ static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page
if (!f2fs_sb_has_inode_chksum(sbi->sb))
return false;
if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
if (!IS_INODE(page) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, le16_to_cpu(ri->i_extra_isize),
......@@ -159,8 +162,15 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
struct f2fs_inode *ri;
__u32 provided, calculated;
if (unlikely(is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)))
return true;
#ifdef CONFIG_F2FS_CHECK_FS
if (!f2fs_enable_inode_chksum(sbi, page))
#else
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
#endif
return true;
ri = &F2FS_NODE(page)->i;
......@@ -185,9 +195,31 @@ void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
static bool sanity_check_inode(struct inode *inode)
static bool sanity_check_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
unsigned long long iblocks;
iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
if (!iblocks) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
"run fsck to fix.",
__func__, inode->i_ino, iblocks);
return false;
}
if (ino_of_node(node_page) != nid_of_node(node_page)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: corrupted inode footer i_ino=%lx, ino,nid: "
"[%u, %u] run fsck to fix.",
__func__, inode->i_ino,
ino_of_node(node_page), nid_of_node(node_page));
return false;
}
if (f2fs_sb_has_flexible_inline_xattr(sbi->sb)
&& !f2fs_has_extra_attr(inode)) {
......@@ -197,6 +229,64 @@ static bool sanity_check_inode(struct inode *inode)
__func__, inode->i_ino);
return false;
}
if (f2fs_has_extra_attr(inode) &&
!f2fs_sb_has_extra_attr(sbi->sb)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) is with extra_attr, "
"but extra_attr feature is off",
__func__, inode->i_ino);
return false;
}
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
fi->i_extra_isize % sizeof(__le32)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
"max: %zu",
__func__, inode->i_ino, fi->i_extra_isize,
F2FS_TOTAL_EXTRA_ATTR_SIZE);
return false;
}
if (F2FS_I(inode)->extent_tree) {
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
if (ei->len &&
(!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC) ||
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
DATA_GENERIC))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) extent info [%u, %u, %u] "
"is incorrect, run fsck to fix",
__func__, inode->i_ino,
ei->blk, ei->fofs, ei->len);
return false;
}
}
if (f2fs_has_inline_data(inode) &&
(!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx, mode=%u) should not have "
"inline_data, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
return false;
}
if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx, mode=%u) should not have "
"inline_dentry, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
return false;
}
return true;
}
......@@ -207,6 +297,7 @@ static int do_read_inode(struct inode *inode)
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
int err;
/* Check if ino is within scope */
if (f2fs_check_nid_range(sbi, inode->i_ino))
......@@ -268,6 +359,11 @@ static int do_read_inode(struct inode *inode)
fi->i_inline_xattr_size = 0;
}
if (!sanity_check_inode(inode, node_page)) {
f2fs_put_page(node_page, 1);
return -EINVAL;
}
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
......@@ -275,8 +371,15 @@ static int do_read_inode(struct inode *inode)
/* get rdev by using inline_info */
__get_inode_rdev(inode, ri);
if (__written_first_block(ri))
if (S_ISREG(inode->i_mode)) {
err = __written_first_block(sbi, ri);
if (err < 0) {
f2fs_put_page(node_page, 1);
return err;
}
if (!err)
set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
}
if (!f2fs_need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
......@@ -297,9 +400,9 @@ static int do_read_inode(struct inode *inode)
fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec);
}
F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
f2fs_put_page(node_page, 1);
......@@ -330,10 +433,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
ret = do_read_inode(inode);
if (ret)
goto bad_inode;
if (!sanity_check_inode(inode)) {
ret = -EINVAL;
goto bad_inode;
}
make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = &f2fs_node_aops;
......@@ -470,10 +569,14 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
if (inode->i_nlink == 0)
clear_inline_node(node_page);
F2FS_I(inode)->i_disk_time[0] = timespec64_to_timespec(inode->i_atime);
F2FS_I(inode)->i_disk_time[1] = timespec64_to_timespec(inode->i_ctime);
F2FS_I(inode)->i_disk_time[2] = timespec64_to_timespec(inode->i_mtime);
F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page);
#endif
}
void f2fs_update_inode_page(struct inode *inode)
......@@ -558,12 +661,11 @@ void f2fs_evict_inode(struct inode *inode)
if (F2FS_HAS_BLOCKS(inode))
err = f2fs_truncate(inode);
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
err = -EIO;
}
#endif
if (!err) {
f2fs_lock_op(sbi);
err = f2fs_remove_inode_page(inode);
......@@ -626,6 +728,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct node_info ni;
int err;
/*
* clear nlink of inode in order to release resource of inode
......@@ -648,10 +751,16 @@ void f2fs_handle_failed_inode(struct inode *inode)
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
f2fs_get_node_info(sbi, inode->i_ino, &ni);
err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"May loss orphan inode, run fsck to fix.");
goto out;
}
if (ni.blk_addr != NULL_ADDR) {
int err = f2fs_acquire_orphan_inode(sbi);
err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
......@@ -664,6 +773,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
set_inode_flag(inode, FI_FREE_NID);
}
out:
f2fs_unlock_op(sbi);
/* iput will drop the inode object */
......
......@@ -51,7 +51,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
inode->i_ino = ino;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->i_crtime = timespec64_to_timespec(inode->i_mtime);
F2FS_I(inode)->i_crtime = inode->i_mtime;
inode->i_generation = sbi->s_next_generation++;
if (S_ISDIR(inode->i_mode))
......@@ -246,7 +246,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
return -EINVAL;
if (hot) {
strncpy(extlist[count], name, strlen(name));
memcpy(extlist[count], name, strlen(name));
sbi->raw_super->hot_ext_count = hot_count + 1;
} else {
char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
......@@ -254,7 +254,7 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
memcpy(buf, &extlist[cold_count],
F2FS_EXTENSION_LEN * hot_count);
memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
strncpy(extlist[cold_count], name, strlen(name));
memcpy(extlist[cold_count], name, strlen(name));
memcpy(&extlist[cold_count + 1], buf,
F2FS_EXTENSION_LEN * hot_count);
sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
......
This diff is collapsed.
......@@ -135,6 +135,11 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
}
static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
{
return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
}
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
......@@ -444,6 +449,10 @@ static inline void set_mark(struct page *page, int mark, int type)
else
flag &= ~(0x1 << type);
rn->footer.flag = cpu_to_le32(flag);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_inode_chksum_set(F2FS_P_SB(page), page);
#endif
}
#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT)
#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT)
......@@ -241,7 +241,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
struct page *page = NULL;
block_t blkaddr;
unsigned int loop_cnt = 0;
unsigned int free_blocks = sbi->user_block_count -
unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
valid_user_blocks(sbi);
int err = 0;
......@@ -252,10 +252,14 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
while (1) {
struct fsync_inode_entry *entry;
if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
return 0;
page = f2fs_get_tmp_page(sbi, blkaddr);
if (IS_ERR(page)) {
err = PTR_ERR(page);
break;
}
if (!is_recoverable_dnode(page))
break;
......@@ -471,7 +475,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
f2fs_get_node_info(sbi, dn.nid, &ni);
err = f2fs_get_node_info(sbi, dn.nid, &ni);
if (err)
goto err;
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
......@@ -507,14 +514,13 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
/* dest is valid block, try to recover from src to dest */
if (f2fs_is_valid_meta_blkaddr(sbi, dest, META_POR)) {
if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = f2fs_reserve_new_block(&dn);
#ifdef CONFIG_F2FS_FAULT_INJECTION
while (err)
while (err &&
IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
err = f2fs_reserve_new_block(&dn);
#endif
/* We should not get -ENOSPC */
f2fs_bug_on(sbi, err);
if (err)
......@@ -568,12 +574,16 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
while (1) {
struct fsync_inode_entry *entry;
if (!f2fs_is_valid_meta_blkaddr(sbi, blkaddr, META_POR))
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
break;
f2fs_ra_meta_pages_cond(sbi, blkaddr);
page = f2fs_get_tmp_page(sbi, blkaddr);
if (IS_ERR(page)) {
err = PTR_ERR(page);
break;
}
if (!is_recoverable_dnode(page)) {
f2fs_put_page(page, 1);
......@@ -628,7 +638,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
if (s_flags & SB_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
f2fs_msg(sbi->sb, KERN_INFO,
"recover fsync data on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
......
This diff is collapsed.
......@@ -85,7 +85,7 @@
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1))
#define GET_SEGNO(sbi, blk_addr) \
((!is_valid_blkaddr(blk_addr)) ? \
((!is_valid_data_blkaddr(sbi, blk_addr)) ? \
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define BLKS_PER_SEC(sbi) \
......@@ -215,7 +215,7 @@ struct segment_allocation {
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
#define MAX_SKIP_ATOMIC_COUNT 16
#define MAX_SKIP_GC_COUNT 16
struct inmem_pages {
struct list_head list;
......@@ -448,6 +448,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
if (IS_CURSEC(sbi, secno))
goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
if (next >= start_segno + sbi->segs_per_sec) {
......@@ -455,6 +457,7 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
free_i->free_sections++;
}
}
skip_free:
spin_unlock(&free_i->segmap_lock);
}
......@@ -645,13 +648,10 @@ static inline void verify_block_addr(struct f2fs_io_info *fio, block_t blk_addr)
{
struct f2fs_sb_info *sbi = fio->sbi;
if (PAGE_TYPE_OF_BIO(fio->type) == META &&
(!is_read_io(fio->op) || fio->is_meta))
BUG_ON(blk_addr < SEG0_BLKADDR(sbi) ||
blk_addr >= MAIN_BLKADDR(sbi));
if (__is_meta_io(fio))
verify_blkaddr(sbi, blk_addr, META_GENERIC);
else
BUG_ON(blk_addr < MAIN_BLKADDR(sbi) ||
blk_addr >= MAX_BLKADDR(sbi));
verify_blkaddr(sbi, blk_addr, DATA_GENERIC);
}
/*
......
This diff is collapsed.
......@@ -9,6 +9,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/compiler.h>
#include <linux/proc_fs.h>
#include <linux/f2fs_fs.h>
#include <linux/seq_file.h>
......@@ -252,6 +253,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
if (t >= 1) {
sbi->gc_mode = GC_URGENT;
if (sbi->gc_thread) {
sbi->gc_thread->gc_wake = 1;
wake_up_interruptible_all(
&sbi->gc_thread->gc_wait_queue_head);
wake_up_discard_thread(sbi, true);
......@@ -286,8 +288,10 @@ static ssize_t f2fs_sbi_store(struct f2fs_attr *a,
bool gc_entry = (!strcmp(a->attr.name, "gc_urgent") ||
a->struct_type == GC_THREAD);
if (gc_entry)
down_read(&sbi->sb->s_umount);
if (gc_entry) {
if (!down_read_trylock(&sbi->sb->s_umount))
return -EAGAIN;
}
ret = __sbi_store(a, sbi, buf, count);
if (gc_entry)
up_read(&sbi->sb->s_umount);
......@@ -393,6 +397,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_seq_blocks, min_seq_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
......@@ -445,6 +450,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ipu_policy),
ATTR_LIST(min_ipu_util),
ATTR_LIST(min_fsync_blocks),
ATTR_LIST(min_seq_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
ATTR_LIST(max_victim_search),
......@@ -516,7 +522,8 @@ static struct kobject f2fs_feat = {
.kset = &f2fs_kset,
};
static int segment_info_seq_show(struct seq_file *seq, void *offset)
static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
......@@ -543,7 +550,8 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
static int segment_bits_seq_show(struct seq_file *seq, void *offset)
static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
......@@ -567,7 +575,8 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset)
return 0;
}
static int iostat_info_seq_show(struct seq_file *seq, void *offset)
static int __maybe_unused iostat_info_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
......@@ -609,6 +618,28 @@ static int iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
void *offset)
{
struct super_block *sb = seq->private;
struct f2fs_sb_info *sbi = F2FS_SB(sb);
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
int i;
seq_puts(seq, "format: victim_secmap bitmaps\n");
for (i = 0; i < MAIN_SECS(sbi); i++) {
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d", test_bit(i, dirty_i->victim_secmap) ? 1 : 0);
if ((i % 10) == 9 || i == (MAIN_SECS(sbi) - 1))
seq_putc(seq, '\n');
else
seq_putc(seq, ' ');
}
return 0;
}
int __init f2fs_init_sysfs(void)
{
int ret;
......@@ -658,6 +689,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
segment_bits_seq_show, sb);
proc_create_single_data("iostat_info", S_IRUGO, sbi->s_proc,
iostat_info_seq_show, sb);
proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc,
victim_bits_seq_show, sb);
}
return 0;
}
......@@ -668,6 +701,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
remove_proc_entry("iostat_info", sbi->s_proc);
remove_proc_entry("segment_info", sbi->s_proc);
remove_proc_entry("segment_bits", sbi->s_proc);
remove_proc_entry("victim_bits", sbi->s_proc);
remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
}
kobject_del(&sbi->s_kobj);
......
......@@ -37,9 +37,6 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
......@@ -62,9 +59,6 @@ static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
return -EOPNOTSUPP;
break;
case F2FS_XATTR_INDEX_TRUSTED:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
break;
case F2FS_XATTR_INDEX_SECURITY:
break;
default:
......@@ -100,12 +94,22 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
const char *name, const void *value,
size_t size, int flags)
{
unsigned char old_advise = F2FS_I(inode)->i_advise;
unsigned char new_advise;
if (!inode_owner_or_capable(inode))
return -EPERM;
if (value == NULL)
return -EINVAL;
F2FS_I(inode)->i_advise |= *(char *)value;
new_advise = *(char *)value;
if (new_advise & ~FADVISE_MODIFIABLE_BITS)
return -EINVAL;
new_advise = new_advise & FADVISE_MODIFIABLE_BITS;
new_advise |= old_advise & ~FADVISE_MODIFIABLE_BITS;
F2FS_I(inode)->i_advise = new_advise;
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
}
......
......@@ -304,11 +304,6 @@ struct f2fs_node {
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
#define NAT_ENTRY_BITMAP_SIZE_ALIGNED \
((NAT_ENTRY_BITMAP_SIZE + BITS_PER_LONG - 1) / \
BITS_PER_LONG * BITS_PER_LONG)
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment