Commit 1501f707 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'f2fs-for-5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've refactored the existing atomic write support
  implemented by in-memory operations to have storing data in disk
  temporarily, which can give us a benefit to accept more atomic writes.

  At the same time, we removed the existing volatile write support.

  We've also revisited the file pinning and GC flows and found some
  corner cases which contributeed abnormal system behaviours.

  As usual, there're several minor code refactoring for readability,
  sanity check, and clean ups.

  Enhancements:

   - allow compression for mmap files in compress_mode=user

   - kill volatile write support

   - change the current atomic write way

   - give priority to select unpinned section for foreground GC

   - introduce data read/write showing path info

   - remove unnecessary f2fs_lock_op in f2fs_new_inode

  Bug fixes:

   - fix the file pinning flow during checkpoint=disable and GCs

   - fix foreground and background GCs to select the right victims and
     get free sections on time

   - fix GC flags on defragmenting pages

   - avoid an infinite loop to flush node pages

   - fix fallocate to use file_modified to update permissions
     consistently"

* tag 'f2fs-for-5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (40 commits)
  f2fs: fix to tag gcing flag on page during file defragment
  f2fs: replace F2FS_I(inode) and sbi by the local variable
  f2fs: add f2fs_init_write_merge_io function
  f2fs: avoid unneeded error handling for revoke_entry_slab allocation
  f2fs: allow compression for mmap files in compress_mode=user
  f2fs: fix typo in comment
  f2fs: make f2fs_read_inline_data() more readable
  f2fs: fix to do sanity check for inline inode
  f2fs: fix fallocate to use file_modified to update permissions consistently
  f2fs: don't use casefolded comparison for "." and ".."
  f2fs: do not stop GC when requiring a free section
  f2fs: keep wait_ms if EAGAIN happens
  f2fs: introduce f2fs_gc_control to consolidate f2fs_gc parameters
  f2fs: reject test_dummy_encryption when !CONFIG_FS_ENCRYPTION
  f2fs: kill volatile write support
  f2fs: change the current atomic write way
  f2fs: don't need inode lock for system hidden quota
  f2fs: stop allocating pinned sections if EAGAIN happens
  f2fs: skip GC if possible when checkpoint disabling
  f2fs: give priority to select unpinned section for foreground GC
  ...
parents 2a5699b0 2d1fe8a8
......@@ -98,13 +98,7 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
}
if (unlikely(!PageUptodate(page))) {
if (page->index == sbi->metapage_eio_ofs) {
if (sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO)
set_ckpt_flags(sbi, CP_ERROR_FLAG);
} else {
sbi->metapage_eio_ofs = page->index;
sbi->metapage_eio_cnt = 0;
}
f2fs_handle_page_eio(sbi, page->index, META);
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
......@@ -158,7 +152,7 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
blkaddr, exist);
set_sbi_flag(sbi, SBI_NEED_FSCK);
WARN_ON(1);
dump_stack();
}
return exist;
}
......@@ -196,7 +190,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
f2fs_warn(sbi, "access invalid blkaddr:%u",
blkaddr);
set_sbi_flag(sbi, SBI_NEED_FSCK);
WARN_ON(1);
dump_stack();
return false;
} else {
return __is_bitmap_valid(sbi, blkaddr, type);
......@@ -1010,9 +1004,7 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type)
return;
set_inode_flag(inode, flag);
if (!f2fs_is_volatile_file(inode))
list_add_tail(&F2FS_I(inode)->dirty_list,
&sbi->inode_list[type]);
list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
stat_inc_dirty_inode(sbi, type);
}
......
......@@ -69,8 +69,7 @@ static bool __is_cp_guaranteed(struct page *page)
if (f2fs_is_compressed_page(page))
return false;
if ((S_ISREG(inode->i_mode) &&
(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
page_private_gcing(page))
return true;
return false;
......@@ -585,6 +584,34 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode,
return false;
}
int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
{
int i;
for (i = 0; i < NR_PAGE_TYPE; i++) {
int n = (i == META) ? 1 : NR_TEMP_TYPE;
int j;
sbi->write_io[i] = f2fs_kmalloc(sbi,
array_size(n, sizeof(struct f2fs_bio_info)),
GFP_KERNEL);
if (!sbi->write_io[i])
return -ENOMEM;
for (j = HOT; j < n; j++) {
init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
sbi->write_io[i][j].sbi = sbi;
sbi->write_io[i][j].bio = NULL;
spin_lock_init(&sbi->write_io[i][j].io_lock);
INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
}
}
return 0;
}
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp)
{
......@@ -2564,7 +2591,12 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
bool ipu_force = false;
int err = 0;
/* Use COW inode to make dnode_of_data for atomic write */
if (f2fs_is_atomic_file(inode))
set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
else
set_new_dnode(&dn, inode, NULL, NULL, 0);
if (need_inplace_update(fio) &&
f2fs_lookup_extent_cache(inode, page->index, &ei)) {
fio->old_blkaddr = ei.blk + page->index - ei.fofs;
......@@ -2601,6 +2633,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
err = -EFSCORRUPTED;
goto out_writepage;
}
/*
* If current allocation needs SSR,
* it had better in-place writes for updated data.
......@@ -2737,11 +2770,6 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
write:
if (f2fs_is_drop_cache(inode))
goto out;
/* we should not write 0'th page having journal header */
if (f2fs_is_volatile_file(inode) && (!page->index ||
(!wbc->for_reclaim &&
f2fs_available_free_memory(sbi, BASE_CHECK))))
goto redirty_out;
/* Dentry/quota blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
......@@ -3314,6 +3342,100 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
return err;
}
static int __find_data_block(struct inode *inode, pgoff_t index,
block_t *blk_addr)
{
struct dnode_of_data dn;
struct page *ipage;
struct extent_info ei = {0, };
int err = 0;
ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage))
return PTR_ERR(ipage);
set_new_dnode(&dn, inode, ipage, ipage, 0);
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
} else {
/* hole case */
err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err) {
dn.data_blkaddr = NULL_ADDR;
err = 0;
}
}
*blk_addr = dn.data_blkaddr;
f2fs_put_dnode(&dn);
return err;
}
static int __reserve_data_block(struct inode *inode, pgoff_t index,
block_t *blk_addr, bool *node_changed)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
struct page *ipage;
int err = 0;
f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
ipage = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto unlock_out;
}
set_new_dnode(&dn, inode, ipage, ipage, 0);
err = f2fs_get_block(&dn, index);
*blk_addr = dn.data_blkaddr;
*node_changed = dn.node_changed;
f2fs_put_dnode(&dn);
unlock_out:
f2fs_do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
return err;
}
static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
struct page *page, loff_t pos, unsigned int len,
block_t *blk_addr, bool *node_changed)
{
struct inode *inode = page->mapping->host;
struct inode *cow_inode = F2FS_I(inode)->cow_inode;
pgoff_t index = page->index;
int err = 0;
block_t ori_blk_addr;
/* If pos is beyond the end of file, reserve a new block in COW inode */
if ((pos & PAGE_MASK) >= i_size_read(inode))
return __reserve_data_block(cow_inode, index, blk_addr,
node_changed);
/* Look for the block in COW inode first */
err = __find_data_block(cow_inode, index, blk_addr);
if (err)
return err;
else if (*blk_addr != NULL_ADDR)
return 0;
/* Look for the block in the original inode */
err = __find_data_block(inode, index, &ori_blk_addr);
if (err)
return err;
/* Finally, we should reserve a new block in COW inode for the update */
err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
if (err)
return err;
if (ori_blk_addr != NULL_ADDR)
*blk_addr = ori_blk_addr;
return 0;
}
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, struct page **pagep, void **fsdata)
{
......@@ -3321,7 +3443,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *page = NULL;
pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
bool need_balance = false, drop_atomic = false;
bool need_balance = false;
block_t blkaddr = NULL_ADDR;
int err = 0;
......@@ -3332,14 +3454,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
goto fail;
}
if ((f2fs_is_atomic_file(inode) &&
!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
err = -ENOMEM;
drop_atomic = true;
goto fail;
}
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
......@@ -3387,6 +3501,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
*pagep = page;
if (f2fs_is_atomic_file(inode))
err = prepare_atomic_write_begin(sbi, page, pos, len,
&blkaddr, &need_balance);
else
err = prepare_write_begin(sbi, page, pos, len,
&blkaddr, &need_balance);
if (err)
......@@ -3443,8 +3561,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
fail:
f2fs_put_page(page, 1);
f2fs_write_failed(inode, pos + len);
if (drop_atomic)
f2fs_drop_inmem_pages_all(sbi, false);
return err;
}
......@@ -3488,8 +3604,12 @@ static int f2fs_write_end(struct file *file,
set_page_dirty(page);
if (pos + copied > i_size_read(inode) &&
!f2fs_verity_in_progress(inode))
!f2fs_verity_in_progress(inode)) {
f2fs_i_size_write(inode, pos + copied);
if (f2fs_is_atomic_file(inode))
f2fs_i_size_write(F2FS_I(inode)->cow_inode,
pos + copied);
}
unlock_out:
f2fs_put_page(page, 1);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
......@@ -3522,9 +3642,6 @@ void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
inode->i_ino == F2FS_COMPRESS_INO(sbi))
clear_page_private_data(&folio->page);
if (page_private_atomic(&folio->page))
return f2fs_drop_inmem_page(inode, &folio->page);
folio_detach_private(folio);
}
......@@ -3536,10 +3653,6 @@ bool f2fs_release_folio(struct folio *folio, gfp_t wait)
if (folio_test_dirty(folio))
return false;
/* This is atomic written page, keep Private */
if (page_private_atomic(&folio->page))
return false;
sbi = F2FS_M_SB(folio->mapping);
if (test_opt(sbi, COMPRESS_CACHE)) {
struct inode *inode = folio->mapping->host;
......@@ -3565,18 +3678,6 @@ static bool f2fs_dirty_data_folio(struct address_space *mapping,
folio_mark_uptodate(folio);
BUG_ON(folio_test_swapcache(folio));
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!page_private_atomic(&folio->page)) {
f2fs_register_inmem_page(inode, &folio->page);
return true;
}
/*
* Previously, this page has been registered, we just
* return here.
*/
return false;
}
if (!folio_test_dirty(folio)) {
filemap_dirty_folio(mapping, folio);
f2fs_update_dirty_folio(inode, folio);
......@@ -3656,42 +3757,14 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
int f2fs_migrate_page(struct address_space *mapping,
struct page *newpage, struct page *page, enum migrate_mode mode)
{
int rc, extra_count;
struct f2fs_inode_info *fi = F2FS_I(mapping->host);
bool atomic_written = page_private_atomic(page);
int rc, extra_count = 0;
BUG_ON(PageWriteback(page));
/* migrating an atomic written page is safe with the inmem_lock hold */
if (atomic_written) {
if (mode != MIGRATE_SYNC)
return -EBUSY;
if (!mutex_trylock(&fi->inmem_lock))
return -EAGAIN;
}
/* one extra reference was held for atomic_write page */
extra_count = atomic_written ? 1 : 0;
rc = migrate_page_move_mapping(mapping, newpage,
page, extra_count);
if (rc != MIGRATEPAGE_SUCCESS) {
if (atomic_written)
mutex_unlock(&fi->inmem_lock);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
}
if (atomic_written) {
struct inmem_pages *cur;
list_for_each_entry(cur, &fi->inmem_pages, list)
if (cur->page == page) {
cur->page = newpage;
break;
}
mutex_unlock(&fi->inmem_lock);
put_page(page);
get_page(newpage);
}
/* guarantee to start from no stale private field */
set_page_private(newpage, 0);
......
......@@ -91,11 +91,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->nquota_files = sbi->nquota_files;
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = sbi->atomic_files;
si->vw_cnt = atomic_read(&sbi->vw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt);
si->nr_dio_read = get_pages(sbi, F2FS_DIO_READ);
si->nr_dio_write = get_pages(sbi, F2FS_DIO_WRITE);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
......@@ -167,8 +164,6 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->io_skip_bggc = sbi->io_skip_bggc;
si->other_skip_bggc = sbi->other_skip_bggc;
si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC];
si->skipped_atomic_files[FG_GC] = sbi->skipped_atomic_files[FG_GC];
si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
......@@ -296,7 +291,6 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
sizeof(struct nat_entry);
si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
for (i = 0; i < MAX_INO_ENTRY; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
si->cache_mem += atomic_read(&sbi->total_ext_tree) *
......@@ -491,10 +485,6 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_data_blks);
seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks);
seq_printf(s, "Skipped : atomic write %llu (%llu)\n",
si->skipped_atomic_files[BG_GC] +
si->skipped_atomic_files[FG_GC],
si->skipped_atomic_files[BG_GC]);
seq_printf(s, "BG skip : IO: %u, Other: %u\n",
si->io_skip_bggc, si->other_skip_bggc);
seq_puts(s, "\nExtent Cache:\n");
......@@ -519,10 +509,8 @@ static int stat_show(struct seq_file *s, void *v)
si->flush_list_empty,
si->nr_discarding, si->nr_discarded,
si->nr_discard_cmd, si->undiscard_blks);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), "
"volatile IO: %4d (Max. %4d)\n",
si->inmem_pages, si->aw_cnt, si->max_aw_cnt,
si->vw_cnt, si->max_vw_cnt);
seq_printf(s, " - atomic IO: %4d (Max. %4d)\n",
si->aw_cnt, si->max_aw_cnt);
seq_printf(s, " - compress: %4d, hit:%8d\n", si->compress_pages, si->compress_page_hit);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
......@@ -623,9 +611,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
for (i = META_CP; i < META_MAX; i++)
atomic_set(&sbi->meta_count[i], 0);
atomic_set(&sbi->vw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
atomic_set(&sbi->max_vw_cnt, 0);
raw_spin_lock_irqsave(&f2fs_stat_lock, flags);
list_add_tail(&si->stat_list, &f2fs_stat_list);
......
......@@ -82,7 +82,8 @@ int f2fs_init_casefolded_name(const struct inode *dir,
#if IS_ENABLED(CONFIG_UNICODE)
struct super_block *sb = dir->i_sb;
if (IS_CASEFOLDED(dir)) {
if (IS_CASEFOLDED(dir) &&
!is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) {
fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
GFP_NOFS, false, F2FS_SB(sb));
if (!fname->cf_name.name)
......
This diff is collapsed.
This diff is collapsed.
......@@ -35,6 +35,10 @@ static int gc_thread_func(void *data)
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq;
unsigned int wait_ms;
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
.should_migrate_blocks = false,
.err_gc_skipped = false };
wait_ms = gc_th->min_sleep_time;
......@@ -141,8 +145,12 @@ static int gc_thread_func(void *data)
if (foreground)
sync_mode = false;
gc_control.init_gc_type = sync_mode ? FG_GC : BG_GC;
gc_control.no_bg_gc = foreground;
gc_control.nr_free_secs = foreground ? 1 : 0;
/* if return value is not zero, no victim was selected */
if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO))
if (f2fs_gc(sbi, &gc_control))
wait_ms = gc_th->no_gc_sleep_time;
if (foreground)
......@@ -646,6 +654,54 @@ static void release_victim_entry(struct f2fs_sb_info *sbi)
f2fs_bug_on(sbi, !list_empty(&am->victim_list));
}
static bool f2fs_pin_section(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
if (!dirty_i->enable_pin_section)
return false;
if (!test_and_set_bit(secno, dirty_i->pinned_secmap))
dirty_i->pinned_secmap_cnt++;
return true;
}
static bool f2fs_pinned_section_exists(struct dirty_seglist_info *dirty_i)
{
return dirty_i->pinned_secmap_cnt;
}
static bool f2fs_section_is_pinned(struct dirty_seglist_info *dirty_i,
unsigned int secno)
{
return dirty_i->enable_pin_section &&
f2fs_pinned_section_exists(dirty_i) &&
test_bit(secno, dirty_i->pinned_secmap);
}
static void f2fs_unpin_all_sections(struct f2fs_sb_info *sbi, bool enable)
{
unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
if (f2fs_pinned_section_exists(DIRTY_I(sbi))) {
memset(DIRTY_I(sbi)->pinned_secmap, 0, bitmap_size);
DIRTY_I(sbi)->pinned_secmap_cnt = 0;
}
DIRTY_I(sbi)->enable_pin_section = enable;
}
static int f2fs_gc_pinned_control(struct inode *inode, int gc_type,
unsigned int segno)
{
if (!f2fs_is_pinned_file(inode))
return 0;
if (gc_type != FG_GC)
return -EBUSY;
if (!f2fs_pin_section(F2FS_I_SB(inode), segno))
f2fs_pin_file_control(inode, true);
return -EAGAIN;
}
/*
* This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
......@@ -787,6 +843,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
if (gc_type == FG_GC && f2fs_section_is_pinned(dirty_i, secno))
goto next;
if (is_atgc) {
add_victim_entry(sbi, &p, segno);
goto next;
......@@ -1194,18 +1253,9 @@ static int move_data_block(struct inode *inode, block_t bidx,
goto out;
}
if (f2fs_is_atomic_file(inode)) {
F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
err = -EAGAIN;
goto out;
}
if (f2fs_is_pinned_file(inode)) {
f2fs_pin_file_control(inode, true);
err = -EAGAIN;
err = f2fs_gc_pinned_control(inode, gc_type, segno);
if (err)
goto out;
}
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
......@@ -1344,18 +1394,9 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
goto out;
}
if (f2fs_is_atomic_file(inode)) {
F2FS_I(inode)->i_gc_failures[GC_FAILURE_ATOMIC]++;
F2FS_I_SB(inode)->skipped_atomic_files[gc_type]++;
err = -EAGAIN;
goto out;
}
if (f2fs_is_pinned_file(inode)) {
if (gc_type == FG_GC)
f2fs_pin_file_control(inode, true);
err = -EAGAIN;
err = f2fs_gc_pinned_control(inode, gc_type, segno);
if (err)
goto out;
}
if (gc_type == BG_GC) {
if (PageWriteback(page)) {
......@@ -1475,11 +1516,19 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
ofs_in_node = le16_to_cpu(entry->ofs_in_node);
if (phase == 3) {
int err;
inode = f2fs_iget(sb, dni.ino);
if (IS_ERR(inode) || is_bad_inode(inode) ||
special_file(inode->i_mode))
continue;
err = f2fs_gc_pinned_control(inode, gc_type, segno);
if (err == -EAGAIN) {
iput(inode);
return submitted;
}
if (!f2fs_down_write_trylock(
&F2FS_I(inode)->i_gc_rwsem[WRITE])) {
iput(inode);
......@@ -1699,23 +1748,21 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
return seg_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
bool background, bool force, unsigned int segno)
int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control)
{
int gc_type = sync ? FG_GC : BG_GC;
int gc_type = gc_control->init_gc_type;
unsigned int segno = gc_control->victim_segno;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
int ret = 0;
struct cp_control cpc;
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
unsigned long long last_skipped = sbi->skipped_atomic_files[FG_GC];
unsigned long long first_skipped;
unsigned int skipped_round = 0, round = 0;
trace_f2fs_gc_begin(sbi->sb, sync, background,
trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
gc_control->nr_free_secs,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
......@@ -1726,7 +1773,6 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
cpc.reason = __get_cp_reason(sbi);
sbi->skipped_gc_rwsem = 0;
first_skipped = last_skipped;
gc_more:
if (unlikely(!(sbi->sb->s_flags & SB_ACTIVE))) {
ret = -EINVAL;
......@@ -1743,8 +1789,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
if (prefree_segments(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
if (prefree_segments(sbi)) {
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
......@@ -1754,54 +1799,69 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
}
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && !background) {
if (gc_type == BG_GC && gc_control->no_bg_gc) {
ret = -EINVAL;
goto stop;
}
retry:
ret = __get_victim(sbi, &segno, gc_type);
if (ret)
if (ret) {
/* allow to search victim from sections has pinned data */
if (ret == -ENODATA && gc_type == FG_GC &&
f2fs_pinned_section_exists(DIRTY_I(sbi))) {
f2fs_unpin_all_sections(sbi, false);
goto retry;
}
goto stop;
}
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force);
if (gc_type == FG_GC &&
seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type,
gc_control->should_migrate_blocks);
total_freed += seg_freed;
if (gc_type == FG_GC) {
if (sbi->skipped_atomic_files[FG_GC] > last_skipped ||
sbi->skipped_gc_rwsem)
skipped_round++;
last_skipped = sbi->skipped_atomic_files[FG_GC];
round++;
}
if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
if (sync)
if (gc_control->init_gc_type == FG_GC ||
!has_not_enough_free_secs(sbi,
(gc_type == FG_GC) ? sec_freed : 0, 0)) {
if (gc_type == FG_GC && sec_freed < gc_control->nr_free_secs)
goto go_gc_more;
goto stop;
if (has_not_enough_free_secs(sbi, sec_freed, 0)) {
if (skipped_round <= MAX_SKIP_GC_COUNT ||
skipped_round * 2 < round) {
segno = NULL_SEGNO;
goto gc_more;
}
if (first_skipped < last_skipped &&
(last_skipped - first_skipped) >
sbi->skipped_gc_rwsem) {
f2fs_drop_inmem_pages_all(sbi, true);
segno = NULL_SEGNO;
goto gc_more;
/* FG_GC stops GC by skip_count */
if (gc_type == FG_GC) {
if (sbi->skipped_gc_rwsem)
skipped_round++;
round++;
if (skipped_round > MAX_SKIP_GC_COUNT &&
skipped_round * 2 >= round) {
ret = f2fs_write_checkpoint(sbi, &cpc);
goto stop;
}
}
if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
/* Write checkpoint to reclaim prefree segments */
if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
prefree_segments(sbi)) {
ret = f2fs_write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
go_gc_more:
segno = NULL_SEGNO;
goto gc_more;
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
SIT_I(sbi)->last_victim[FLUSH_DEVICE] = gc_control->victim_segno;
if (gc_type == FG_GC)
f2fs_unpin_all_sections(sbi, true);
trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed,
get_pages(sbi, F2FS_DIRTY_NODES),
......@@ -1816,7 +1876,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
put_gc_inode(&gc_list);
if (sync && !ret)
if (gc_control->err_gc_skipped && !ret)
ret = sec_freed ? 0 : -EAGAIN;
return ret;
}
......
......@@ -91,7 +91,7 @@ static u32 TEA_hash_name(const u8 *p, size_t len)
/*
* Compute @fname->hash. For all directories, @fname->disk_name must be set.
* For casefolded directories, @fname->usr_fname must be set, and also
* @fname->cf_name if the filename is valid Unicode.
* @fname->cf_name if the filename is valid Unicode and is not "." or "..".
*/
void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
{
......@@ -110,10 +110,11 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
/*
* If the casefolded name is provided, hash it instead of the
* on-disk name. If the casefolded name is *not* provided, that
* should only be because the name wasn't valid Unicode, so fall
* back to treating the name as an opaque byte sequence. Note
* that to handle encrypted directories, the fallback must use
* usr_fname (plaintext) rather than disk_name (ciphertext).
* should only be because the name wasn't valid Unicode or was
* "." or "..", so fall back to treating the name as an opaque
* byte sequence. Note that to handle encrypted directories,
* the fallback must use usr_fname (plaintext) rather than
* disk_name (ciphertext).
*/
WARN_ON_ONCE(!fname->usr_fname->name);
if (fname->cf_name.name) {
......
......@@ -14,21 +14,40 @@
#include "node.h"
#include <trace/events/f2fs.h>
bool f2fs_may_inline_data(struct inode *inode)
static bool support_inline_data(struct inode *inode)
{
if (f2fs_is_atomic_file(inode))
return false;
if (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))
return false;
if (i_size_read(inode) > MAX_INLINE_DATA(inode))
return false;
return true;
}
if (f2fs_post_read_required(inode))
bool f2fs_may_inline_data(struct inode *inode)
{
if (!support_inline_data(inode))
return false;
return !f2fs_post_read_required(inode);
}
bool f2fs_sanity_check_inline_data(struct inode *inode)
{
if (!f2fs_has_inline_data(inode))
return false;
if (!support_inline_data(inode))
return true;
/*
* used by sanity_check_inode(), when disk layout fields has not
* been synchronized to inmem fields.
*/
return (S_ISREG(inode->i_mode) &&
(file_is_encrypt(inode) || file_is_verity(inode) ||
(F2FS_I(inode)->i_flags & F2FS_COMPR_FL)));
}
bool f2fs_may_inline_dentry(struct inode *inode)
......
......@@ -260,8 +260,8 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
if (F2FS_I(inode)->extent_tree) {
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
if (fi->extent_tree) {
struct extent_info *ei = &fi->extent_tree->largest;
if (ei->len &&
(!f2fs_is_valid_blkaddr(sbi, ei->blk,
......@@ -276,8 +276,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
}
if (f2fs_has_inline_data(inode) &&
(!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
if (f2fs_sanity_check_inline_data(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
__func__, inode->i_ino, inode->i_mode);
......@@ -466,10 +465,10 @@ static int do_read_inode(struct inode *inode)
}
}
F2FS_I(inode)->i_disk_time[0] = inode->i_atime;
F2FS_I(inode)->i_disk_time[1] = inode->i_ctime;
F2FS_I(inode)->i_disk_time[2] = inode->i_mtime;
F2FS_I(inode)->i_disk_time[3] = F2FS_I(inode)->i_crtime;
fi->i_disk_time[0] = inode->i_atime;
fi->i_disk_time[1] = inode->i_ctime;
fi->i_disk_time[2] = inode->i_mtime;
fi->i_disk_time[3] = fi->i_crtime;
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
......@@ -745,9 +744,8 @@ void f2fs_evict_inode(struct inode *inode)
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
int err = 0;
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
f2fs_drop_inmem_pages(inode);
f2fs_abort_atomic_write(inode, true);
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
......@@ -796,8 +794,22 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_lock_op(sbi);
err = f2fs_remove_inode_page(inode);
f2fs_unlock_op(sbi);
if (err == -ENOENT)
if (err == -ENOENT) {
err = 0;
/*
* in fuzzed image, another node may has the same
* block address as inode's, if it was truncated
* previously, truncation of inode node will fail.
*/
if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
f2fs_warn(F2FS_I_SB(inode),
"f2fs_evict_inode: inconsistent node id, ino:%lu",
inode->i_ino);
f2fs_inode_synced(inode);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
}
}
/* give more chances, if ENOMEM case */
......
......@@ -37,13 +37,10 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
if (!inode)
return ERR_PTR(-ENOMEM);
f2fs_lock_op(sbi);
if (!f2fs_alloc_nid(sbi, &ino)) {
f2fs_unlock_op(sbi);
err = -ENOSPC;
goto fail;
}
f2fs_unlock_op(sbi);
nid_free = true;
......@@ -461,6 +458,13 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
return 0;
}
if (!S_ISDIR(dir->i_mode)) {
f2fs_err(sbi, "inconsistent inode status, skip recovering inline_dots inode (ino:%lu, i_mode:%u, pino:%u)",
dir->i_ino, dir->i_mode, pino);
set_sbi_flag(sbi, SBI_NEED_FSCK);
return -ENOTDIR;
}
err = f2fs_dquot_initialize(dir);
if (err)
return err;
......@@ -836,8 +840,8 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
}
static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
struct dentry *dentry, umode_t mode,
struct inode **whiteout)
struct dentry *dentry, umode_t mode, bool is_whiteout,
struct inode **new_inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
......@@ -851,7 +855,7 @@ static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
if (IS_ERR(inode))
return PTR_ERR(inode);
if (whiteout) {
if (is_whiteout) {
init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
inode->i_op = &f2fs_special_inode_operations;
} else {
......@@ -876,21 +880,25 @@ static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
f2fs_add_orphan_inode(inode);
f2fs_alloc_nid_done(sbi, inode->i_ino);
if (whiteout) {
if (is_whiteout) {
f2fs_i_links_write(inode, false);
spin_lock(&inode->i_lock);
inode->i_state |= I_LINKABLE;
spin_unlock(&inode->i_lock);
*whiteout = inode;
} else {
if (dentry)
d_tmpfile(dentry, inode);
else
f2fs_i_links_write(inode, false);
}
/* link_count was changed by d_tmpfile as well. */
f2fs_unlock_op(sbi);
unlock_new_inode(inode);
if (new_inode)
*new_inode = inode;
f2fs_balance_fs(sbi, true);
return 0;
......@@ -911,7 +919,7 @@ static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
if (!f2fs_is_checkpoint_ready(sbi))
return -ENOSPC;
return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, NULL);
return __f2fs_tmpfile(mnt_userns, dir, dentry, mode, false, NULL);
}
static int f2fs_create_whiteout(struct user_namespace *mnt_userns,
......@@ -921,7 +929,13 @@ static int f2fs_create_whiteout(struct user_namespace *mnt_userns,
return -EIO;
return __f2fs_tmpfile(mnt_userns, dir, NULL,
S_IFCHR | WHITEOUT_MODE, whiteout);
S_IFCHR | WHITEOUT_MODE, true, whiteout);
}
int f2fs_get_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
struct inode **new_inode)
{
return __f2fs_tmpfile(mnt_userns, dir, NULL, S_IFREG, false, new_inode);
}
static int f2fs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
......
......@@ -90,10 +90,6 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == INMEM_PAGES) {
/* it allows 20% / total_ram for inmemory pages */
mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
res = mem_size < (val.totalram / 5);
} else if (type == DISCARD_CACHE) {
mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
sizeof(struct discard_cmd)) >> PAGE_SHIFT;
......@@ -1416,8 +1412,7 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
err = read_node_page(page, 0);
if (err < 0) {
f2fs_put_page(page, 1);
return ERR_PTR(err);
goto out_put_err;
} else if (err == LOCKED_PAGE) {
err = 0;
goto page_hit;
......@@ -1443,7 +1438,9 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
goto out_err;
}
page_hit:
if (unlikely(nid != nid_of_node(page))) {
if (likely(nid == nid_of_node(page)))
return page;
f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
nid, nid_of_node(page), ino_of_node(page),
ofs_of_node(page), cpver_of_node(page),
......@@ -1452,10 +1449,10 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
err = -EINVAL;
out_err:
ClearPageUptodate(page);
out_put_err:
f2fs_handle_page_eio(sbi, page->index, NODE);
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
return page;
}
struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
......@@ -1631,7 +1628,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
goto redirty_out;
}
if (atomic && !test_opt(sbi, NOBARRIER))
if (atomic && !test_opt(sbi, NOBARRIER) && !f2fs_sb_has_blkzoned(sbi))
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
/* should add to global list before clearing PAGECACHE status */
......
......@@ -147,7 +147,6 @@ enum mem_type {
DIRTY_DENTS, /* indicates dirty dentry pages */
INO_ENTRIES, /* indicates inode entries */
EXTENT_CACHE, /* indicates extent cache */
INMEM_PAGES, /* indicates inmemory pages */
DISCARD_CACHE, /* indicates memory of cached discard cmds */
COMPRESS_PAGE, /* indicates memory of cached compressed pages */
BASE_CHECK, /* check kernel status */
......
This diff is collapsed.
......@@ -24,6 +24,7 @@
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
#define SE_PAGETYPE(se) ((IS_NODESEG((se)->type) ? NODE : DATA))
static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
unsigned short seg_type)
......@@ -224,10 +225,10 @@ struct segment_allocation {
#define MAX_SKIP_GC_COUNT 16
struct inmem_pages {
struct revoke_entry {
struct list_head list;
struct page *page;
block_t old_addr; /* for revoking when fail to commit */
pgoff_t index;
};
struct sit_info {
......@@ -294,6 +295,9 @@ struct dirty_seglist_info {
struct mutex seglist_lock; /* lock for segment bitmaps */
int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */
unsigned long *victim_secmap; /* background GC victims */
unsigned long *pinned_secmap; /* pinned victims from foreground GC */
unsigned int pinned_secmap_cnt; /* count of victims which has pinned data */
bool enable_pin_section; /* enable pinning section */
};
/* victim selection function for cleaning and SSR */
......@@ -572,11 +576,10 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi)
return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
}
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
unsigned int node_blocks, unsigned int dent_blocks)
{
unsigned int node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
get_pages(sbi, F2FS_DIRTY_DENTS);
unsigned int dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
unsigned int segno, left_blocks;
int i;
......@@ -602,19 +605,28 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
int freed, int needed)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
get_pages(sbi, F2FS_DIRTY_DENTS) +
get_pages(sbi, F2FS_DIRTY_IMETA);
unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
unsigned int node_secs = total_node_blocks / BLKS_PER_SEC(sbi);
unsigned int dent_secs = total_dent_blocks / BLKS_PER_SEC(sbi);
unsigned int node_blocks = total_node_blocks % BLKS_PER_SEC(sbi);
unsigned int dent_blocks = total_dent_blocks % BLKS_PER_SEC(sbi);
unsigned int free, need_lower, need_upper;
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
return false;
if (free_sections(sbi) + freed == reserved_sections(sbi) + needed &&
has_curseg_enough_space(sbi))
free = free_sections(sbi) + freed;
need_lower = node_secs + dent_secs + reserved_sections(sbi) + needed;
need_upper = need_lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
if (free > need_upper)
return false;
return (free_sections(sbi) + freed) <=
(node_secs + 2 * dent_secs + imeta_secs +
reserved_sections(sbi) + needed);
else if (free <= need_lower)
return true;
return !has_curseg_enough_space(sbi, node_blocks, dent_blocks);
}
static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
......
......@@ -525,10 +525,11 @@ static int f2fs_set_test_dummy_encryption(struct super_block *sb,
return -EINVAL;
}
f2fs_warn(sbi, "Test dummy encryption mode enabled");
return 0;
#else
f2fs_warn(sbi, "Test dummy encryption mount option ignored");
f2fs_warn(sbi, "test_dummy_encryption option not supported");
return -EINVAL;
#endif
return 0;
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
......@@ -1339,9 +1340,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
spin_lock_init(&fi->i_size_lock);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->gdirty_list);
INIT_LIST_HEAD(&fi->inmem_ilist);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
init_f2fs_rwsem(&fi->i_xattr_sem);
......@@ -1382,9 +1380,8 @@ static int f2fs_drop_inode(struct inode *inode)
atomic_inc(&inode->i_count);
spin_unlock(&inode->i_lock);
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
f2fs_drop_inmem_pages(inode);
f2fs_abort_atomic_write(inode, true);
/* should remain fi->extent_tree for writepage */
f2fs_destroy_extent_node(inode);
......@@ -1707,18 +1704,23 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
block_t total_count, user_block_count, start_count;
u64 avail_node_count;
unsigned int total_valid_node_count;
total_count = le64_to_cpu(sbi->raw_super->block_count);
user_block_count = sbi->user_block_count;
start_count = le32_to_cpu(sbi->raw_super->segment0_blkaddr);
buf->f_type = F2FS_SUPER_MAGIC;
buf->f_bsize = sbi->blocksize;
buf->f_blocks = total_count - start_count;
spin_lock(&sbi->stat_lock);
user_block_count = sbi->user_block_count;
total_valid_node_count = valid_node_count(sbi);
avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
sbi->current_reserved_blocks;
spin_lock(&sbi->stat_lock);
if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
buf->f_bfree = 0;
else
......@@ -1731,14 +1733,12 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
else
buf->f_bavail = 0;
avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM;
if (avail_node_count > user_block_count) {
buf->f_files = user_block_count;
buf->f_ffree = buf->f_bavail;
} else {
buf->f_files = avail_node_count;
buf->f_ffree = min(avail_node_count - valid_node_count(sbi),
buf->f_ffree = min(avail_node_count - total_valid_node_count,
buf->f_bavail);
}
......@@ -2055,7 +2055,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
{
unsigned int s_flags = sbi->sb->s_flags;
struct cp_control cpc;
unsigned int gc_mode;
unsigned int gc_mode = sbi->gc_mode;
int err = 0;
int ret;
block_t unusable;
......@@ -2066,14 +2066,25 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
}
sbi->sb->s_flags |= SB_ACTIVE;
/* check if we need more GC first */
unusable = f2fs_get_unusable_blocks(sbi);
if (!f2fs_disable_cp_again(sbi, unusable))
goto skip_gc;
f2fs_update_time(sbi, DISABLE_TIME);
gc_mode = sbi->gc_mode;
sbi->gc_mode = GC_URGENT_HIGH;
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
struct f2fs_gc_control gc_control = {
.victim_segno = NULL_SEGNO,
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
.err_gc_skipped = true,
.nr_free_secs = 1 };
f2fs_down_write(&sbi->gc_lock);
err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
err = f2fs_gc(sbi, &gc_control);
if (err == -ENODATA) {
err = 0;
break;
......@@ -2094,6 +2105,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
goto restore_flag;
}
skip_gc:
f2fs_down_write(&sbi->gc_lock);
cpc.reason = CP_PAUSE;
set_sbi_flag(sbi, SBI_CP_DISABLED);
......@@ -2684,6 +2696,7 @@ int f2fs_quota_sync(struct super_block *sb, int type)
if (!sb_has_quota_active(sb, cnt))
continue;
if (!f2fs_sb_has_quota_ino(sbi))
inode_lock(dqopt->files[cnt]);
/*
......@@ -2703,6 +2716,7 @@ int f2fs_quota_sync(struct super_block *sb, int type)
f2fs_up_read(&sbi->quota_sem);
f2fs_unlock_op(sbi);
if (!f2fs_sb_has_quota_ino(sbi))
inode_unlock(dqopt->files[cnt]);
if (ret)
......@@ -3648,22 +3662,29 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
struct block_device *bdev = FDEV(devi).bdev;
sector_t nr_sectors = bdev_nr_sectors(bdev);
struct f2fs_report_zones_args rep_zone_arg;
u64 zone_sectors;
int ret;
if (!f2fs_sb_has_blkzoned(sbi))
return 0;
zone_sectors = bdev_zone_sectors(bdev);
if (!is_power_of_2(zone_sectors)) {
f2fs_err(sbi, "F2FS does not support non power of 2 zone sizes\n");
return -EINVAL;
}
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
SECTOR_TO_BLOCK(bdev_zone_sectors(bdev)))
SECTOR_TO_BLOCK(zone_sectors))
return -EINVAL;
sbi->blocks_per_blkz = SECTOR_TO_BLOCK(bdev_zone_sectors(bdev));
sbi->blocks_per_blkz = SECTOR_TO_BLOCK(zone_sectors);
if (sbi->log_blocks_per_blkz && sbi->log_blocks_per_blkz !=
__ilog2_u32(sbi->blocks_per_blkz))
return -EINVAL;
sbi->log_blocks_per_blkz = __ilog2_u32(sbi->blocks_per_blkz);
FDEV(devi).nr_blkz = SECTOR_TO_BLOCK(nr_sectors) >>
sbi->log_blocks_per_blkz;
if (nr_sectors & (bdev_zone_sectors(bdev) - 1))
if (nr_sectors & (zone_sectors - 1))
FDEV(devi).nr_blkz++;
FDEV(devi).blkz_seq = f2fs_kvzalloc(sbi,
......@@ -4070,30 +4091,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
set_sbi_flag(sbi, SBI_POR_DOING);
spin_lock_init(&sbi->stat_lock);
for (i = 0; i < NR_PAGE_TYPE; i++) {
int n = (i == META) ? 1 : NR_TEMP_TYPE;
int j;
sbi->write_io[i] =
f2fs_kmalloc(sbi,
array_size(n,
sizeof(struct f2fs_bio_info)),
GFP_KERNEL);
if (!sbi->write_io[i]) {
err = -ENOMEM;
err = f2fs_init_write_merge_io(sbi);
if (err)
goto free_bio_info;
}
for (j = HOT; j < n; j++) {
init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
sbi->write_io[i][j].sbi = sbi;
sbi->write_io[i][j].bio = NULL;
spin_lock_init(&sbi->write_io[i][j].io_lock);
INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
}
}
init_f2fs_rwsem(&sbi->cp_rwsem);
init_f2fs_rwsem(&sbi->quota_sem);
......
......@@ -129,7 +129,7 @@ static int f2fs_begin_enable_verity(struct file *filp)
if (f2fs_verity_in_progress(inode))
return -EBUSY;
if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
if (f2fs_is_atomic_file(inode))
return -EOPNOTSUPP;
/*
......
......@@ -15,10 +15,6 @@ TRACE_DEFINE_ENUM(NODE);
TRACE_DEFINE_ENUM(DATA);
TRACE_DEFINE_ENUM(META);
TRACE_DEFINE_ENUM(META_FLUSH);
TRACE_DEFINE_ENUM(INMEM);
TRACE_DEFINE_ENUM(INMEM_DROP);
TRACE_DEFINE_ENUM(INMEM_INVALIDATE);
TRACE_DEFINE_ENUM(INMEM_REVOKE);
TRACE_DEFINE_ENUM(IPU);
TRACE_DEFINE_ENUM(OPU);
TRACE_DEFINE_ENUM(HOT);
......@@ -59,10 +55,6 @@ TRACE_DEFINE_ENUM(CP_RESIZE);
{ DATA, "DATA" }, \
{ META, "META" }, \
{ META_FLUSH, "META_FLUSH" }, \
{ INMEM, "INMEM" }, \
{ INMEM_DROP, "INMEM_DROP" }, \
{ INMEM_INVALIDATE, "INMEM_INVALIDATE" }, \
{ INMEM_REVOKE, "INMEM_REVOKE" }, \
{ IPU, "IN-PLACE" }, \
{ OPU, "OUT-OF-PLACE" })
......@@ -652,19 +644,22 @@ TRACE_EVENT(f2fs_background_gc,
TRACE_EVENT(f2fs_gc_begin,
TP_PROTO(struct super_block *sb, bool sync, bool background,
TP_PROTO(struct super_block *sb, int gc_type, bool no_bg_gc,
unsigned int nr_free_secs,
long long dirty_nodes, long long dirty_dents,
long long dirty_imeta, unsigned int free_sec,
unsigned int free_seg, int reserved_seg,
unsigned int prefree_seg),
TP_ARGS(sb, sync, background, dirty_nodes, dirty_dents, dirty_imeta,
TP_ARGS(sb, gc_type, no_bg_gc, nr_free_secs, dirty_nodes,
dirty_dents, dirty_imeta,
free_sec, free_seg, reserved_seg, prefree_seg),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(bool, sync)
__field(bool, background)
__field(int, gc_type)
__field(bool, no_bg_gc)
__field(unsigned int, nr_free_secs)
__field(long long, dirty_nodes)
__field(long long, dirty_dents)
__field(long long, dirty_imeta)
......@@ -676,8 +671,9 @@ TRACE_EVENT(f2fs_gc_begin,
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->sync = sync;
__entry->background = background;
__entry->gc_type = gc_type;
__entry->no_bg_gc = no_bg_gc;
__entry->nr_free_secs = nr_free_secs;
__entry->dirty_nodes = dirty_nodes;
__entry->dirty_dents = dirty_dents;
__entry->dirty_imeta = dirty_imeta;
......@@ -687,12 +683,13 @@ TRACE_EVENT(f2fs_gc_begin,
__entry->prefree_seg = prefree_seg;
),
TP_printk("dev = (%d,%d), sync = %d, background = %d, nodes = %lld, "
"dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
TP_printk("dev = (%d,%d), gc_type = %s, no_background_GC = %d, nr_free_secs = %u, "
"nodes = %lld, dents = %lld, imeta = %lld, free_sec:%u, free_seg:%u, "
"rsv_seg:%d, prefree_seg:%u",
show_dev(__entry->dev),
__entry->sync,
__entry->background,
show_gc_type(__entry->gc_type),
(__entry->gc_type == BG_GC) ? __entry->no_bg_gc : -1,
__entry->nr_free_secs,
__entry->dirty_nodes,
__entry->dirty_dents,
__entry->dirty_imeta,
......@@ -1285,20 +1282,6 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite,
TP_ARGS(page, type)
);
DEFINE_EVENT(f2fs__page, f2fs_register_inmem_page,
TP_PROTO(struct page *page, int type),
TP_ARGS(page, type)
);
DEFINE_EVENT(f2fs__page, f2fs_commit_inmem_page,
TP_PROTO(struct page *page, int type),
TP_ARGS(page, type)
);
TRACE_EVENT(f2fs_filemap_fault,
TP_PROTO(struct inode *inode, pgoff_t index, unsigned long ret),
......@@ -2063,6 +2046,100 @@ TRACE_EVENT(f2fs_fiemap,
__entry->ret)
);
DECLARE_EVENT_CLASS(f2fs__rw_start,
TP_PROTO(struct inode *inode, loff_t offset, int bytes,
pid_t pid, char *pathname, char *command),
TP_ARGS(inode, offset, bytes, pid, pathname, command),
TP_STRUCT__entry(
__string(pathbuf, pathname)
__field(loff_t, offset)
__field(int, bytes)
__field(loff_t, i_size)
__string(cmdline, command)
__field(pid_t, pid)
__field(ino_t, ino)
),
TP_fast_assign(
/*
* Replace the spaces in filenames and cmdlines
* because this screws up the tooling that parses
* the traces.
*/
__assign_str(pathbuf, pathname);
(void)strreplace(__get_str(pathbuf), ' ', '_');
__entry->offset = offset;
__entry->bytes = bytes;
__entry->i_size = i_size_read(inode);
__assign_str(cmdline, command);
(void)strreplace(__get_str(cmdline), ' ', '_');
__entry->pid = pid;
__entry->ino = inode->i_ino;
),
TP_printk("entry_name %s, offset %llu, bytes %d, cmdline %s,"
" pid %d, i_size %llu, ino %lu",
__get_str(pathbuf), __entry->offset, __entry->bytes,
__get_str(cmdline), __entry->pid, __entry->i_size,
(unsigned long) __entry->ino)
);
DECLARE_EVENT_CLASS(f2fs__rw_end,
TP_PROTO(struct inode *inode, loff_t offset, int bytes),
TP_ARGS(inode, offset, bytes),
TP_STRUCT__entry(
__field(ino_t, ino)
__field(loff_t, offset)
__field(int, bytes)
),
TP_fast_assign(
__entry->ino = inode->i_ino;
__entry->offset = offset;
__entry->bytes = bytes;
),
TP_printk("ino %lu, offset %llu, bytes %d",
(unsigned long) __entry->ino,
__entry->offset, __entry->bytes)
);
DEFINE_EVENT(f2fs__rw_start, f2fs_dataread_start,
TP_PROTO(struct inode *inode, loff_t offset, int bytes,
pid_t pid, char *pathname, char *command),
TP_ARGS(inode, offset, bytes, pid, pathname, command)
);
DEFINE_EVENT(f2fs__rw_end, f2fs_dataread_end,
TP_PROTO(struct inode *inode, loff_t offset, int bytes),
TP_ARGS(inode, offset, bytes)
);
DEFINE_EVENT(f2fs__rw_start, f2fs_datawrite_start,
TP_PROTO(struct inode *inode, loff_t offset, int bytes,
pid_t pid, char *pathname, char *command),
TP_ARGS(inode, offset, bytes, pid, pathname, command)
);
DEFINE_EVENT(f2fs__rw_end, f2fs_datawrite_end,
TP_PROTO(struct inode *inode, loff_t offset, int bytes),
TP_ARGS(inode, offset, bytes)
);
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment