Commit f9a03ae1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "This series adds two ioctls to control cached data and fragmented
  files.  Most of the rest fixes missing error cases and bugs that we
  have not covered so far.  Summary:

  Enhancements:
   - support an ioctl to execute online file defragmentation
   - support an ioctl to flush cached data
   - speed up shrinking of extent_cache entries
   - handle broken superblock
   - refector dirty inode management infra
   - revisit f2fs_map_blocks to handle more cases
   - reduce global lock coverage
   - add detecting user's idle time

  Major bug fixes:
   - fix data race condition on cached nat entries
   - fix error cases of volatile and atomic writes"

* tag 'for-f2fs-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (87 commits)
  f2fs: should unset atomic flag after successful commit
  f2fs: fix wrong memory condition check
  f2fs: monitor the number of background checkpoint
  f2fs: detect idle time depending on user behavior
  f2fs: introduce time and interval facility
  f2fs: skip releasing nodes in chindless extent tree
  f2fs: use atomic type for node count in extent tree
  f2fs: recognize encrypted data in f2fs_fiemap
  f2fs: clean up f2fs_balance_fs
  f2fs: remove redundant calls
  f2fs: avoid unnecessary f2fs_balance_fs calls
  f2fs: check the page status filled from disk
  f2fs: introduce __get_node_page to reuse common code
  f2fs: check node id earily when readaheading node page
  f2fs: read isize while holding i_mutex in fiemap
  Revert "f2fs: check the node block address of newly allocated nid"
  f2fs: cover more area with nat_tree_lock
  f2fs: introduce max_file_blocks in sbi
  f2fs crypto: check CONFIG_F2FS_FS_XATTR for encrypted symlink
  f2fs: introduce zombie list for fast shrinking extent trees
  ...
parents 1289ace5 447135a8
......@@ -87,6 +87,12 @@ Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the checkpoint timing.
What: /sys/fs/f2fs/<disk>/idle_interval
Date: January 2016
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls the idle timing.
What: /sys/fs/f2fs/<disk>/ra_nid_pages
Date: October 2015
Contact: "Chao Yu" <chao2.yu@samsung.com>
......
......@@ -102,7 +102,7 @@ background_gc=%s Turn on/off cleaning operations, namely garbage
collection, triggered in background when I/O subsystem is
idle. If background_gc=on, it will turn on the garbage
collection and if background_gc=off, garbage collection
will be truned off. If background_gc=sync, it will turn
will be turned off. If background_gc=sync, it will turn
on synchronous garbage collection running in background.
Default value for this option is on. So garbage
collection is on by default.
......@@ -145,10 +145,12 @@ extent_cache Enable an extent cache based on rb-tree, it can cache
as many as extent which map between contiguous logical
address and physical address per inode, resulting in
increasing the cache hit ratio. Set by default.
noextent_cache Diable an extent cache based on rb-tree explicitly, see
noextent_cache Disable an extent cache based on rb-tree explicitly, see
the above extent_cache mount option.
noinline_data Disable the inline data feature, inline data feature is
enabled by default.
data_flush Enable data flushing before checkpoint in order to
persist data of regular and symlink.
================================================================================
DEBUGFS ENTRIES
......@@ -192,7 +194,7 @@ Files in /sys/fs/f2fs/<devname>
policy for garbage collection. Setting gc_idle = 0
(default) will disable this option. Setting
gc_idle = 1 will select the Cost Benefit approach
& setting gc_idle = 2 will select the greedy aproach.
& setting gc_idle = 2 will select the greedy approach.
reclaim_segments This parameter controls the number of prefree
segments to be reclaimed. If the number of prefree
......@@ -298,7 +300,7 @@ The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
file. Each file is dump_ssa and dump_sit.
The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
It shows on-disk inode information reconized by a given inode number, and is
It shows on-disk inode information recognized by a given inode number, and is
able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
./dump_sit respectively.
......
......@@ -237,7 +237,7 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
unlock_page(page);
if (wbc->for_reclaim)
if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, META, WRITE);
return 0;
......@@ -410,13 +410,13 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
spin_unlock(&im->ino_lock);
}
void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* add new dirty ino entry into list */
__add_ino_entry(sbi, ino, type);
}
void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
/* remove dirty ino entry from list */
__remove_ino_entry(sbi, ino, type);
......@@ -434,7 +434,7 @@ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
return e ? true : false;
}
void release_dirty_inode(struct f2fs_sb_info *sbi)
void release_ino_entry(struct f2fs_sb_info *sbi)
{
struct ino_entry *e, *tmp;
int i;
......@@ -722,47 +722,48 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
return -EINVAL;
}
static int __add_dirty_inode(struct inode *inode, struct inode_entry *new)
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
return -EEXIST;
if (is_inode_flag_set(fi, flag))
return;
set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
F2FS_I(inode)->dirty_dir = new;
list_add_tail(&new->list, &sbi->dir_inode_list);
stat_inc_dirty_dir(sbi);
return 0;
set_inode_flag(fi, flag);
list_add_tail(&fi->dirty_list, &sbi->inode_list[type]);
stat_inc_dirty_inode(sbi, type);
}
static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
if (get_dirty_pages(inode) ||
!is_inode_flag_set(F2FS_I(inode), flag))
return;
list_del_init(&fi->dirty_list);
clear_inode_flag(fi, flag);
stat_dec_dirty_inode(F2FS_I_SB(inode), type);
}
void update_dirty_page(struct inode *inode, struct page *page)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode_entry *new;
int ret = 0;
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return;
if (!S_ISDIR(inode->i_mode)) {
inode_inc_dirty_pages(inode);
goto out;
}
new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
ret = __add_dirty_inode(inode, new);
spin_lock(&sbi->inode_lock[type]);
__add_dirty_inode(inode, type);
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->dir_inode_lock);
spin_unlock(&sbi->inode_lock[type]);
if (ret)
kmem_cache_free(inode_entry_slab, new);
out:
SetPagePrivate(page);
f2fs_trace_pid(page);
}
......@@ -770,70 +771,60 @@ void update_dirty_page(struct inode *inode, struct page *page)
void add_dirty_dir_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode_entry *new =
f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
int ret = 0;
new->inode = inode;
INIT_LIST_HEAD(&new->list);
spin_lock(&sbi->dir_inode_lock);
ret = __add_dirty_inode(inode, new);
spin_unlock(&sbi->dir_inode_lock);
if (ret)
kmem_cache_free(inode_entry_slab, new);
spin_lock(&sbi->inode_lock[DIR_INODE]);
__add_dirty_inode(inode, DIR_INODE);
spin_unlock(&sbi->inode_lock[DIR_INODE]);
}
void remove_dirty_dir_inode(struct inode *inode)
void remove_dirty_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inode_entry *entry;
if (!S_ISDIR(inode->i_mode))
return;
struct f2fs_inode_info *fi = F2FS_I(inode);
enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
spin_lock(&sbi->dir_inode_lock);
if (get_dirty_pages(inode) ||
!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
spin_unlock(&sbi->dir_inode_lock);
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return;
}
entry = F2FS_I(inode)->dirty_dir;
list_del(&entry->list);
F2FS_I(inode)->dirty_dir = NULL;
clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
stat_dec_dirty_dir(sbi);
spin_unlock(&sbi->dir_inode_lock);
kmem_cache_free(inode_entry_slab, entry);
spin_lock(&sbi->inode_lock[type]);
__remove_dirty_inode(inode, type);
spin_unlock(&sbi->inode_lock[type]);
/* Only from the recovery routine */
if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
if (is_inode_flag_set(fi, FI_DELAY_IPUT)) {
clear_inode_flag(fi, FI_DELAY_IPUT);
iput(inode);
}
}
void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
{
struct list_head *head;
struct inode_entry *entry;
struct inode *inode;
struct f2fs_inode_info *fi;
bool is_dir = (type == DIR_INODE);
trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
get_pages(sbi, is_dir ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
spin_lock(&sbi->dir_inode_lock);
spin_lock(&sbi->inode_lock[type]);
head = &sbi->dir_inode_list;
head = &sbi->inode_list[type];
if (list_empty(head)) {
spin_unlock(&sbi->dir_inode_lock);
return;
spin_unlock(&sbi->inode_lock[type]);
trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
get_pages(sbi, is_dir ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return 0;
}
entry = list_entry(head->next, struct inode_entry, list);
inode = igrab(entry->inode);
spin_unlock(&sbi->dir_inode_lock);
fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[type]);
if (inode) {
filemap_fdatawrite(inode->i_mapping);
iput(inode);
......@@ -868,11 +859,9 @@ static int block_operations(struct f2fs_sb_info *sbi)
/* write all the dirty dentry pages */
if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
f2fs_unlock_all(sbi);
sync_dirty_dir_inodes(sbi);
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
err = sync_dirty_inodes(sbi, DIR_INODE);
if (err)
goto out;
}
goto retry_flush_dents;
}
......@@ -885,10 +874,9 @@ static int block_operations(struct f2fs_sb_info *sbi)
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
sync_node_pages(sbi, 0, &wbc);
if (unlikely(f2fs_cp_error(sbi))) {
err = sync_node_pages(sbi, 0, &wbc);
if (err) {
f2fs_unlock_all(sbi);
err = -EIO;
goto out;
}
goto retry_flush_nodes;
......@@ -919,7 +907,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
finish_wait(&sbi->cp_wait, &wait);
}
static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
......@@ -945,7 +933,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
}
next_free_nid(sbi, &last_nid);
......@@ -1030,7 +1018,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
/* write out checkpoint buffer at block 0 */
update_meta_page(sbi, ckpt, start_blk++);
......@@ -1058,7 +1046,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
......@@ -1081,22 +1069,25 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
discard_blk);
release_dirty_inode(sbi);
release_ino_entry(sbi);
if (unlikely(f2fs_cp_error(sbi)))
return;
return -EIO;
clear_prefree_segments(sbi, cpc);
clear_sbi_flag(sbi, SBI_IS_DIRTY);
return 0;
}
/*
* We guarantee that this checkpoint procedure will not fail.
*/
void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long long ckpt_ver;
int err = 0;
mutex_lock(&sbi->cp_mutex);
......@@ -1104,14 +1095,19 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
goto out;
if (unlikely(f2fs_cp_error(sbi)))
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
goto out;
if (f2fs_readonly(sbi->sb))
}
if (f2fs_readonly(sbi->sb)) {
err = -EROFS;
goto out;
}
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");
if (block_operations(sbi))
err = block_operations(sbi);
if (err)
goto out;
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
......@@ -1133,7 +1129,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
do_checkpoint(sbi, cpc);
err = do_checkpoint(sbi, cpc);
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
......@@ -1143,10 +1139,11 @@ void write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
"checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
sbi->cp_expires = round_jiffies_up(jiffies + HZ * sbi->cp_interval);
f2fs_update_time(sbi, CP_TIME);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
out:
mutex_unlock(&sbi->cp_mutex);
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
return err;
}
void init_ino_entry_info(struct f2fs_sb_info *sbi)
......
......@@ -225,7 +225,8 @@ void set_data_blkaddr(struct dnode_of_data *dn)
/* Get physical address of data block */
addr_array = blkaddr_in_node(rn);
addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
set_page_dirty(node_page);
if (set_page_dirty(node_page))
dn->node_changed = true;
}
int reserve_new_block(struct dnode_of_data *dn)
......@@ -412,7 +413,7 @@ struct page *get_new_data_page(struct inode *inode,
struct page *page;
struct dnode_of_data dn;
int err;
repeat:
page = f2fs_grab_cache_page(mapping, index, true);
if (!page) {
/*
......@@ -441,12 +442,11 @@ struct page *get_new_data_page(struct inode *inode,
} else {
f2fs_put_page(page, 1);
page = get_read_data_page(inode, index, READ_SYNC, true);
/* if ipage exists, blkaddr should be NEW_ADDR */
f2fs_bug_on(F2FS_I_SB(inode), ipage);
page = get_lock_data_page(inode, index, true);
if (IS_ERR(page))
goto repeat;
/* wait for read completion */
lock_page(page);
return page;
}
got_it:
if (new_i_size && i_size_read(inode) <
......@@ -494,14 +494,10 @@ static int __allocate_data_block(struct dnode_of_data *dn)
if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
i_size_write(dn->inode,
((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
/* direct IO doesn't use extent cache to maximize the performance */
f2fs_drop_largest_extent(dn->inode, fofs);
return 0;
}
static void __allocate_data_blocks(struct inode *inode, loff_t offset,
static int __allocate_data_blocks(struct inode *inode, loff_t offset,
size_t count)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
......@@ -510,14 +506,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
u64 len = F2FS_BYTES_TO_BLK(count);
bool allocated;
u64 end_offset;
int err = 0;
while (len) {
f2fs_balance_fs(sbi);
f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
if (get_dnode_of_data(&dn, start, ALLOC_NODE))
err = get_dnode_of_data(&dn, start, ALLOC_NODE);
if (err)
goto out;
allocated = false;
......@@ -526,12 +523,15 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
while (dn.ofs_in_node < end_offset && len) {
block_t blkaddr;
if (unlikely(f2fs_cp_error(sbi)))
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
goto sync_out;
}
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
if (__allocate_data_block(&dn))
err = __allocate_data_block(&dn);
if (err)
goto sync_out;
allocated = true;
}
......@@ -545,8 +545,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, dn.node_changed);
}
return;
return err;
sync_out:
if (allocated)
......@@ -554,7 +556,8 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
f2fs_put_dnode(&dn);
out:
f2fs_unlock_op(sbi);
return;
f2fs_balance_fs(sbi, dn.node_changed);
return err;
}
/*
......@@ -566,7 +569,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
* b. do not use extent cache for better performance
* c. give the block addresses to blockdev
*/
static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, int flag)
{
unsigned int maxblocks = map->m_len;
......@@ -577,6 +580,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int err = 0, ofs = 1;
struct extent_info ei;
bool allocated = false;
block_t blkaddr;
map->m_len = 0;
map->m_flags = 0;
......@@ -592,7 +596,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
}
if (create)
f2fs_lock_op(F2FS_I_SB(inode));
f2fs_lock_op(sbi);
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
......@@ -640,12 +644,21 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
pgofs++;
get_next:
if (map->m_len >= maxblocks)
goto sync_out;
if (dn.ofs_in_node >= end_offset) {
if (allocated)
sync_inode_page(&dn);
allocated = false;
f2fs_put_dnode(&dn);
if (create) {
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, dn.node_changed);
f2fs_lock_op(sbi);
}
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, pgofs, mode);
if (err) {
......@@ -657,8 +670,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
}
if (maxblocks > map->m_len) {
block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
if (create) {
......@@ -694,15 +706,17 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
map->m_len++;
goto get_next;
}
}
sync_out:
if (allocated)
sync_inode_page(&dn);
put_out:
f2fs_put_dnode(&dn);
unlock_out:
if (create)
f2fs_unlock_op(F2FS_I_SB(inode));
if (create) {
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, dn.node_changed);
}
out:
trace_f2fs_map_blocks(inode, map, err);
return err;
......@@ -742,6 +756,10 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
/* Block number less than F2FS MAX BLOCKS */
if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
return -EFBIG;
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP);
}
......@@ -761,10 +779,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
{
struct buffer_head map_bh;
sector_t start_blk, last_blk;
loff_t isize = i_size_read(inode);
loff_t isize;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
bool past_eof = false, whole_file = false;
int ret = 0;
ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
......@@ -779,16 +796,19 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
mutex_lock(&inode->i_mutex);
if (len >= isize) {
whole_file = true;
len = isize;
}
isize = i_size_read(inode);
if (start >= isize)
goto out;
if (start + len > isize)
len = isize - start;
if (logical_to_blk(inode, len) == 0)
len = blk_to_logical(inode, 1);
start_blk = logical_to_blk(inode, start);
last_blk = logical_to_blk(inode, start + len - 1);
next:
memset(&map_bh, 0, sizeof(struct buffer_head));
map_bh.b_size = len;
......@@ -800,41 +820,26 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
/* HOLE */
if (!buffer_mapped(&map_bh)) {
start_blk++;
if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
past_eof = 1;
if (past_eof && size) {
/* Go through holes util pass the EOF */
if (blk_to_logical(inode, start_blk++) < isize)
goto prep_next;
/* Found a hole beyond isize means no more extents.
* Note that the premise is that filesystems don't
* punch holes beyond isize and keep size unchanged.
*/
flags |= FIEMAP_EXTENT_LAST;
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size, flags);
} else if (size) {
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size, flags);
size = 0;
}
/* if we have holes up to/past EOF then we're done */
if (start_blk > last_blk || past_eof || ret)
goto out;
} else {
if (start_blk > last_blk && !whole_file) {
if (size) {
if (f2fs_encrypted_inode(inode))
flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size, flags);
goto out;
}
/*
* if size != 0 then we know we already have an extent
* to add, so add it.
*/
if (size) {
ret = fiemap_fill_next_extent(fieinfo, logical,
phys, size, flags);
if (ret)
if (start_blk > last_blk || ret)
goto out;
}
logical = blk_to_logical(inode, start_blk);
phys = blk_to_logical(inode, map_bh.b_blocknr);
......@@ -845,14 +850,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
start_blk += logical_to_blk(inode, size);
/*
* If we are past the EOF, then we need to make sure as
* soon as we find a hole that the last extent we found
* is marked with FIEMAP_EXTENT_LAST
*/
if (!past_eof && logical + size >= isize)
past_eof = true;
}
prep_next:
cond_resched();
if (fatal_signal_pending(current))
ret = -EINTR;
......@@ -1083,6 +1081,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
*/
if (unlikely(fio->blk_addr != NEW_ADDR &&
!is_cold_data(page) &&
!IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
rewrite_data_page(fio);
set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
......@@ -1179,10 +1178,11 @@ static int f2fs_write_data_page(struct page *page,
if (err)
ClearPageUptodate(page);
unlock_page(page);
if (need_balance_fs)
f2fs_balance_fs(sbi);
if (wbc->for_reclaim)
f2fs_balance_fs(sbi, need_balance_fs);
if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
remove_dirty_inode(inode);
}
return 0;
redirty_out:
......@@ -1354,6 +1354,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
/* skip writing during file defragment */
if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
goto skip_write;
/* during POR, we don't need to trigger writepage at all. */
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto skip_write;
......@@ -1369,7 +1373,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
if (locked)
mutex_unlock(&sbi->writepages);
remove_dirty_dir_inode(inode);
remove_dirty_inode(inode);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return ret;
......@@ -1382,11 +1386,83 @@ static int f2fs_write_data_pages(struct address_space *mapping,
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
loff_t i_size = i_size_read(inode);
if (to > i_size) {
truncate_pagecache(inode, i_size);
truncate_blocks(inode, i_size, true);
}
}
static int prepare_write_begin(struct f2fs_sb_info *sbi,
struct page *page, loff_t pos, unsigned len,
block_t *blk_addr, bool *node_changed)
{
struct inode *inode = page->mapping->host;
pgoff_t index = page->index;
struct dnode_of_data dn;
struct page *ipage;
bool locked = false;
struct extent_info ei;
int err = 0;
if (to > inode->i_size) {
truncate_pagecache(inode, inode->i_size);
truncate_blocks(inode, inode->i_size, true);
if (f2fs_has_inline_data(inode) ||
(pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
f2fs_lock_op(sbi);
locked = true;
}
restart:
/* check inline_data */
ipage = get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto unlock_out;
}
set_new_dnode(&dn, inode, ipage, ipage, 0);
if (f2fs_has_inline_data(inode)) {
if (pos + len <= MAX_INLINE_DATA) {
read_inline_data(page, ipage);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
sync_inode_page(&dn);
} else {
err = f2fs_convert_inline_page(&dn, page);
if (err)
goto out;
if (dn.data_blkaddr == NULL_ADDR)
err = f2fs_get_block(&dn, index);
}
} else if (locked) {
err = f2fs_get_block(&dn, index);
} else {
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
} else {
bool restart = false;
/* hole case */
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err || (!err && dn.data_blkaddr == NULL_ADDR))
restart = true;
if (restart) {
f2fs_put_dnode(&dn);
f2fs_lock_op(sbi);
locked = true;
goto restart;
}
}
}
/* convert_inline_page can make node_changed */
*blk_addr = dn.data_blkaddr;
*node_changed = dn.node_changed;
out:
f2fs_put_dnode(&dn);
unlock_out:
if (locked)
f2fs_unlock_op(sbi);
return err;
}
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
......@@ -1396,15 +1472,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *page = NULL;
struct page *ipage;
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
struct dnode_of_data dn;
bool need_balance = false;
block_t blkaddr = NULL_ADDR;
int err = 0;
trace_f2fs_write_begin(inode, pos, len, flags);
f2fs_balance_fs(sbi);
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
......@@ -1424,41 +1498,27 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
*pagep = page;
f2fs_lock_op(sbi);
/* check inline_data */
ipage = get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
err = PTR_ERR(ipage);
goto unlock_fail;
}
set_new_dnode(&dn, inode, ipage, ipage, 0);
err = prepare_write_begin(sbi, page, pos, len,
&blkaddr, &need_balance);
if (err)
goto fail;
if (f2fs_has_inline_data(inode)) {
if (pos + len <= MAX_INLINE_DATA) {
read_inline_data(page, ipage);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
sync_inode_page(&dn);
goto put_next;
if (need_balance && has_not_enough_free_secs(sbi, 0)) {
unlock_page(page);
f2fs_balance_fs(sbi, true);
lock_page(page);
if (page->mapping != mapping) {
/* The page got truncated from under us */
f2fs_put_page(page, 1);
goto repeat;
}
err = f2fs_convert_inline_page(&dn, page);
if (err)
goto put_fail;
}
err = f2fs_get_block(&dn, index);
if (err)
goto put_fail;
put_next:
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
f2fs_wait_on_page_writeback(page, DATA);
/* wait for GCed encrypted page writeback */
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
if (len == PAGE_CACHE_SIZE)
goto out_update;
......@@ -1474,14 +1534,14 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
goto out_update;
}
if (dn.data_blkaddr == NEW_ADDR) {
if (blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
.rw = READ_SYNC,
.blk_addr = dn.data_blkaddr,
.blk_addr = blkaddr,
.page = page,
.encrypted_page = NULL,
};
......@@ -1512,10 +1572,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
clear_cold_data(page);
return 0;
put_fail:
f2fs_put_dnode(&dn);
unlock_fail:
f2fs_unlock_op(sbi);
fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
......@@ -1540,6 +1596,7 @@ static int f2fs_write_end(struct file *file,
}
f2fs_put_page(page, 1);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return copied;
}
......@@ -1567,11 +1624,9 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
int err;
/* we don't need to use inline_data strictly */
if (f2fs_has_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
}
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
......@@ -1583,12 +1638,10 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (iov_iter_rw(iter) == WRITE) {
__allocate_data_blocks(inode, offset, count);
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
err = -EIO;
err = __allocate_data_blocks(inode, offset, count);
if (err)
goto out;
}
}
err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
out:
......
......@@ -38,12 +38,15 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic64_read(&sbi->total_hit_ext);
si->ext_tree = sbi->total_ext_tree;
si->ext_tree = atomic_read(&sbi->total_ext_tree);
si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
si->ndirty_dirs = sbi->n_dirty_dirs;
si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA);
si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE];
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->wb_pages = get_pages(sbi, F2FS_WRITEBACK);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
......@@ -105,7 +108,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
bimodal = 0;
total_vblocks = 0;
blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg);
blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
hblks_per_sec = blks_per_sec / 2;
for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);
......@@ -189,10 +192,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
sizeof(struct nat_entry_set);
si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
si->cache_mem += sbi->n_dirty_dirs * sizeof(struct inode_entry);
for (i = 0; i <= UPDATE_INO; i++)
si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
si->cache_mem += sbi->total_ext_tree * sizeof(struct extent_tree);
si->cache_mem += atomic_read(&sbi->total_ext_tree) *
sizeof(struct extent_tree);
si->cache_mem += atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node);
......@@ -267,7 +270,8 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_count);
seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n",
si->prefree_count, si->free_segs, si->free_secs);
seq_printf(s, "CP calls: %d\n", si->cp_count);
seq_printf(s, "CP calls: %d (BG: %d)\n",
si->cp_count, si->bg_cp_count);
seq_printf(s, "GC calls: %d (BG: %d)\n",
si->call_count, si->bg_gc);
seq_printf(s, " - data segments : %d (%d)\n",
......@@ -288,8 +292,8 @@ static int stat_show(struct seq_file *s, void *v)
!si->total_ext ? 0 :
div64_u64(si->hit_total * 100, si->total_ext),
si->hit_total, si->total_ext);
seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
si->ext_tree, si->ext_node);
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages);
......@@ -297,6 +301,8 @@ static int stat_show(struct seq_file *s, void *v)
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d\n",
si->ndirty_dent, si->ndirty_dirs);
seq_printf(s, " - datas: %4d in files:%4d\n",
si->ndirty_data, si->ndirty_files);
seq_printf(s, " - meta: %4d in %4d\n",
si->ndirty_meta, si->meta_pages);
seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n",
......@@ -404,20 +410,23 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
kfree(si);
}
void __init f2fs_create_root_stats(void)
int __init f2fs_create_root_stats(void)
{
struct dentry *file;
f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL);
if (!f2fs_debugfs_root)
return;
return -ENOMEM;
file = debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root,
NULL, &stat_fops);
if (!file) {
debugfs_remove(f2fs_debugfs_root);
f2fs_debugfs_root = NULL;
return -ENOMEM;
}
return 0;
}
void f2fs_destroy_root_stats(void)
......
......@@ -172,8 +172,6 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
namehash = f2fs_dentry_hash(&name);
f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
......@@ -238,6 +236,14 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
goto out;
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
"Corrupted max_depth of %lu: %u",
dir->i_ino, max_depth);
max_depth = MAX_DIR_HASH_DEPTH;
F2FS_I(dir)->i_current_depth = max_depth;
mark_inode_dirty(dir);
}
for (level = 0; level < max_depth; level++) {
de = find_in_level(dir, level, &fname, res_page);
......@@ -444,7 +450,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
/* once the failed inode becomes a bad inode, i_mode is S_IFREG */
truncate_inode_pages(&inode->i_data, 0);
truncate_blocks(inode, 0, false);
remove_dirty_dir_inode(inode);
remove_dirty_inode(inode);
remove_inode_page(inode);
return ERR_PTR(err);
}
......@@ -630,6 +636,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
f2fs_put_page(dentry_page, 1);
out:
f2fs_fname_free_filename(&fname);
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
return err;
}
......@@ -651,6 +658,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir)
clear_inode_flag(F2FS_I(inode), FI_NEW_INODE);
fail:
up_write(&F2FS_I(inode)->i_sem);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return err;
}
......@@ -695,6 +703,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
int i;
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
if (f2fs_has_inline_dentry(dir))
return f2fs_delete_inline_entry(dentry, page, dir, inode);
......@@ -855,23 +865,25 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
for (; n < npages; n++) {
dentry_page = get_lock_data_page(inode, n, false);
if (IS_ERR(dentry_page))
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT)
continue;
else
goto out;
}
dentry_blk = kmap(dentry_page);
make_dentry_ptr(inode, &d, (void *)dentry_blk, 1);
if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr))
goto stop;
ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr)) {
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
dentry_page = NULL;
break;
}
stop:
if (dentry_page && !IS_ERR(dentry_page)) {
ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK;
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
}
......
......@@ -36,7 +36,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
et->count++;
atomic_inc(&et->node_cnt);
atomic_inc(&sbi->total_ext_node);
return en;
}
......@@ -45,7 +45,7 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
rb_erase(&en->rb_node, &et->root);
et->count--;
atomic_dec(&et->node_cnt);
atomic_dec(&sbi->total_ext_node);
if (et->cached_en == en)
......@@ -68,11 +68,13 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
atomic_set(&et->refcount, 0);
et->count = 0;
sbi->total_ext_tree++;
INIT_LIST_HEAD(&et->list);
atomic_set(&et->node_cnt, 0);
atomic_inc(&sbi->total_ext_tree);
} else {
atomic_dec(&sbi->total_zombie_tree);
list_del_init(&et->list);
}
atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
/* never died until evict_inode */
......@@ -131,7 +133,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
{
struct rb_node *node, *next;
struct extent_node *en;
unsigned int count = et->count;
unsigned int count = atomic_read(&et->node_cnt);
node = rb_first(&et->root);
while (node) {
......@@ -152,7 +154,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
node = next;
}
return count - et->count;
return count - atomic_read(&et->node_cnt);
}
static void __drop_largest_extent(struct inode *inode,
......@@ -164,34 +166,33 @@ static void __drop_largest_extent(struct inode *inode,
largest->len = 0;
}
void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
if (!f2fs_may_extent_tree(inode))
return;
__drop_largest_extent(inode, fofs, 1);
}
void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
/* return true, if inode page is changed */
bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
if (!f2fs_may_extent_tree(inode))
return;
if (!f2fs_may_extent_tree(inode)) {
/* drop largest extent */
if (i_ext && i_ext->len) {
i_ext->len = 0;
return true;
}
return false;
}
et = __grab_extent_tree(inode);
if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
return;
if (!i_ext || !i_ext->len)
return false;
set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
write_lock(&et->lock);
if (et->count)
if (atomic_read(&et->node_cnt))
goto out;
en = __init_extent_tree(sbi, et, &ei);
......@@ -202,6 +203,7 @@ void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
}
out:
write_unlock(&et->lock);
return false;
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
......@@ -549,45 +551,44 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
struct extent_tree *et, *next;
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
struct radix_tree_root *root = &sbi->extent_tree_root;
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
bool do_free = false;
if (!test_opt(sbi, EXTENT_CACHE))
return 0;
if (!atomic_read(&sbi->total_zombie_tree))
goto free_node;
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
if (!atomic_read(&et->refcount)) {
list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
if (atomic_read(&et->node_cnt)) {
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
}
radix_tree_delete(root, et->ino);
list_del_init(&et->list);
radix_tree_delete(&sbi->extent_tree_root, et->ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
atomic_dec(&sbi->total_ext_tree);
atomic_dec(&sbi->total_zombie_tree);
tree_cnt++;
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
}
}
}
up_write(&sbi->extent_tree_lock);
free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
......@@ -599,15 +600,19 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
if (!remained--)
break;
list_del_init(&en->list);
do_free = true;
}
spin_unlock(&sbi->extent_lock);
if (do_free == false)
goto unlock_out;
/*
* reset ino for searching victims from beginning of global extent tree.
*/
ino = F2FS_ROOT_INO(sbi);
while ((found = radix_tree_gang_lookup(root,
while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
......@@ -615,9 +620,13 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
write_lock(&et->lock);
if (!atomic_read(&et->node_cnt))
continue;
if (write_trylock(&et->lock)) {
node_cnt += __free_extent_tree(sbi, et, false);
write_unlock(&et->lock);
}
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
......@@ -637,7 +646,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
if (!et)
if (!et || !atomic_read(&et->node_cnt))
return 0;
write_lock(&et->lock);
......@@ -656,8 +665,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (!et)
return;
if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
atomic_dec(&et->refcount);
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(&et->node_cnt)) {
down_write(&sbi->extent_tree_lock);
list_add_tail(&et->list, &sbi->zombie_list);
atomic_inc(&sbi->total_zombie_tree);
up_write(&sbi->extent_tree_lock);
return;
}
......@@ -666,11 +679,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
atomic_dec(&et->refcount);
f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
atomic_dec(&sbi->total_ext_tree);
up_write(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
......@@ -722,7 +734,9 @@ void init_extent_cache_info(struct f2fs_sb_info *sbi)
init_rwsem(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
sbi->total_ext_tree = 0;
atomic_set(&sbi->total_ext_tree, 0);
INIT_LIST_HEAD(&sbi->zombie_list);
atomic_set(&sbi->total_zombie_tree, 0);
atomic_set(&sbi->total_ext_node, 0);
}
......
......@@ -21,6 +21,7 @@
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
......@@ -54,6 +55,7 @@
#define F2FS_MOUNT_FASTBOOT 0x00001000
#define F2FS_MOUNT_EXTENT_CACHE 0x00002000
#define F2FS_MOUNT_FORCE_FG_GC 0x00004000
#define F2FS_MOUNT_DATA_FLUSH 0x00008000
#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option)
......@@ -125,6 +127,7 @@ enum {
#define BATCHED_TRIM_BLOCKS(sbi) \
(BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg)
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 120 /* 2 mins */
struct cp_control {
int reason;
......@@ -158,13 +161,7 @@ struct ino_entry {
nid_t ino; /* inode number */
};
/*
* for the list of directory inodes or gc inodes.
* NOTE: there are two slab users for this structure, if we add/modify/delete
* fields in structure for one of slab users, it may affect fields or size of
* other one, in this condition, it's better to split both of slab and related
* data structure.
*/
/* for the list of inodes to be GCed */
struct inode_entry {
struct list_head list; /* list head */
struct inode *inode; /* vfs inode pointer */
......@@ -234,6 +231,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8)
#define F2FS_IOC_SET_ENCRYPTION_POLICY \
_IOR('f', 19, struct f2fs_encryption_policy)
......@@ -258,8 +256,14 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
*/
#define F2FS_IOC32_GETFLAGS FS_IOC32_GETFLAGS
#define F2FS_IOC32_SETFLAGS FS_IOC32_SETFLAGS
#define F2FS_IOC32_GETVERSION FS_IOC32_GETVERSION
#endif
struct f2fs_defragment {
u64 start;
u64 len;
};
/*
* For INODE and NODE manager
*/
......@@ -357,9 +361,9 @@ struct extent_tree {
struct rb_root root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */
struct extent_info largest; /* largested extent info */
struct list_head list; /* to be used by sbi->zombie_list */
rwlock_t lock; /* protect extent info rb-tree */
atomic_t refcount; /* reference count of rb-tree */
unsigned int count; /* # of extent node in rb-tree*/
atomic_t node_cnt; /* # of extent node in rb-tree*/
};
/*
......@@ -434,8 +438,8 @@ struct f2fs_inode_info {
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
struct inode_entry *dirty_dir; /* the pointer of dirty dir */
struct list_head dirty_list; /* linked in global dirty list */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
......@@ -544,6 +548,7 @@ struct dnode_of_data {
nid_t nid; /* node id of the direct node block */
unsigned int ofs_in_node; /* data offset in the node page */
bool inode_page_locked; /* inode page is locked or not */
bool node_changed; /* is node block changed */
block_t data_blkaddr; /* block address of the node block */
};
......@@ -647,6 +652,7 @@ struct f2fs_sm_info {
enum count_type {
F2FS_WRITEBACK,
F2FS_DIRTY_DENTS,
F2FS_DIRTY_DATA,
F2FS_DIRTY_NODES,
F2FS_DIRTY_META,
F2FS_INMEM_PAGES,
......@@ -695,6 +701,12 @@ struct f2fs_bio_info {
struct rw_semaphore io_rwsem; /* blocking op for bio */
};
enum inode_type {
DIR_INODE, /* for dirty dir inode */
FILE_INODE, /* for dirty regular/symlink inode */
NR_INODE_TYPE,
};
/* for inner inode cache management */
struct inode_management {
struct radix_tree_root ino_root; /* ino entry array */
......@@ -711,11 +723,17 @@ enum {
SBI_POR_DOING, /* recovery is doing or not */
};
enum {
CP_TIME,
REQ_TIME,
MAX_TIME,
};
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
struct buffer_head *raw_super_buf; /* buffer head of raw sb */
struct f2fs_super_block *raw_super; /* raw super block pointer */
int valid_super_block; /* valid super block no */
int s_flag; /* flags for sbi */
/* for node-related operations */
......@@ -737,23 +755,26 @@ struct f2fs_sb_info {
struct rw_semaphore node_write; /* locking node writes */
struct mutex writepages; /* mutex for writepages() */
wait_queue_head_t cp_wait;
long cp_expires, cp_interval; /* next expected periodic cp */
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
long interval_time[MAX_TIME]; /* to store thresholds */
struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */
/* for orphan inode, use 0'th array */
unsigned int max_orphans; /* max orphan inodes */
/* for directory inode management */
struct list_head dir_inode_list; /* dir inode list */
spinlock_t dir_inode_lock; /* for dir inode list lock */
/* for inode management */
struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */
spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */
/* for extent tree cache */
struct radix_tree_root extent_tree_root;/* cache extent cache entries */
struct rw_semaphore extent_tree_lock; /* locking extent radix tree */
struct list_head extent_list; /* lru list for shrinker */
spinlock_t extent_lock; /* locking extent lru list */
int total_ext_tree; /* extent tree count */
atomic_t total_ext_tree; /* extent tree count */
struct list_head zombie_list; /* extent zombie tree list */
atomic_t total_zombie_tree; /* extent zombie tree count */
atomic_t total_ext_node; /* extent info count */
/* basic filesystem units */
......@@ -771,6 +792,7 @@ struct f2fs_sb_info {
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
unsigned int total_valid_inode_count; /* valid inode count */
loff_t max_file_blocks; /* max block index of file */
int active_logs; /* # of active logs */
int dir_level; /* directory level */
......@@ -809,7 +831,7 @@ struct f2fs_sb_info {
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
int bg_gc; /* background gc calls */
unsigned int n_dirty_dirs; /* # of dir inodes */
unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */
#endif
unsigned int last_victim[2]; /* last victim segment # */
spinlock_t stat_lock; /* lock for stat operations */
......@@ -824,6 +846,31 @@ struct f2fs_sb_info {
unsigned int shrinker_run_no;
};
static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
{
sbi->last_time[type] = jiffies;
}
static inline bool f2fs_time_over(struct f2fs_sb_info *sbi, int type)
{
struct timespec ts = {sbi->interval_time[type], 0};
unsigned long interval = timespec_to_jiffies(&ts);
return time_after(jiffies, sbi->last_time[type] + interval);
}
static inline bool is_idle(struct f2fs_sb_info *sbi)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
struct request_list *rl = &q->root_rl;
if (rl->count[BLK_RW_SYNC] || rl->count[BLK_RW_ASYNC])
return 0;
return f2fs_time_over(sbi, REQ_TIME);
}
/*
* Inline functions
*/
......@@ -1059,8 +1106,8 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_inc_dirty_pages(struct inode *inode)
{
atomic_inc(&F2FS_I(inode)->dirty_pages);
if (S_ISDIR(inode->i_mode))
inc_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
inc_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
......@@ -1075,9 +1122,8 @@ static inline void inode_dec_dirty_pages(struct inode *inode)
return;
atomic_dec(&F2FS_I(inode)->dirty_pages);
if (S_ISDIR(inode->i_mode))
dec_page_count(F2FS_I_SB(inode), F2FS_DIRTY_DENTS);
dec_page_count(F2FS_I_SB(inode), S_ISDIR(inode->i_mode) ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA);
}
static inline int get_pages(struct f2fs_sb_info *sbi, int count_type)
......@@ -1092,8 +1138,7 @@ static inline int get_dirty_pages(struct inode *inode)
static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type)
{
unsigned int pages_per_sec = sbi->segs_per_sec *
(1 << sbi->log_blocks_per_seg);
unsigned int pages_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg;
return ((get_pages(sbi, block_type) + pages_per_sec - 1)
>> sbi->log_blocks_per_seg) / sbi->segs_per_sec;
}
......@@ -1416,6 +1461,8 @@ enum {
FI_DROP_CACHE, /* drop dirty page cache */
FI_DATA_EXIST, /* indicate data exists */
FI_INLINE_DOTS, /* indicate inline dot dentries */
FI_DO_DEFRAG, /* indicate defragment is running */
FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
};
static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
......@@ -1659,8 +1706,8 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long);
void f2fs_set_inode_flags(struct inode *);
struct inode *f2fs_iget(struct super_block *, unsigned long);
int try_to_free_nats(struct f2fs_sb_info *, int);
void update_inode(struct inode *, struct page *);
void update_inode_page(struct inode *);
int update_inode(struct inode *, struct page *);
int update_inode_page(struct inode *);
int f2fs_write_inode(struct inode *, struct writeback_control *);
void f2fs_evict_inode(struct inode *);
void handle_failed_inode(struct inode *);
......@@ -1765,7 +1812,7 @@ void destroy_node_manager_caches(void);
*/
void register_inmem_page(struct inode *, struct page *);
int commit_inmem_pages(struct inode *, bool);
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs(struct f2fs_sb_info *, bool);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
int create_flush_cmd_control(struct f2fs_sb_info *);
......@@ -1811,9 +1858,9 @@ bool is_valid_blkaddr(struct f2fs_sb_info *, block_t, int);
int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int, bool);
void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t);
long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long);
void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type);
void release_dirty_inode(struct f2fs_sb_info *);
void add_ino_entry(struct f2fs_sb_info *, nid_t, int type);
void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type);
void release_ino_entry(struct f2fs_sb_info *);
bool exist_written_data(struct f2fs_sb_info *, nid_t, int);
int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
......@@ -1823,9 +1870,9 @@ int recover_orphan_inodes(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *);
void update_dirty_page(struct inode *, struct page *);
void add_dirty_dir_inode(struct inode *);
void remove_dirty_dir_inode(struct inode *);
void sync_dirty_dir_inodes(struct f2fs_sb_info *);
void write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
void remove_dirty_inode(struct inode *);
int sync_dirty_inodes(struct f2fs_sb_info *, enum inode_type);
int write_checkpoint(struct f2fs_sb_info *, struct cp_control *);
void init_ino_entry_info(struct f2fs_sb_info *);
int __init create_checkpoint_caches(void);
void destroy_checkpoint_caches(void);
......@@ -1845,6 +1892,7 @@ struct page *find_data_page(struct inode *, pgoff_t);
struct page *get_lock_data_page(struct inode *, pgoff_t, bool);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct f2fs_io_info *);
int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
int f2fs_release_page(struct page *, gfp_t);
......@@ -1875,8 +1923,9 @@ struct f2fs_stat_info {
int main_area_segs, main_area_sections, main_area_zones;
unsigned long long hit_largest, hit_cached, hit_rbtree;
unsigned long long hit_total, total_ext;
int ext_tree, ext_node;
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int ext_tree, zombie_tree, ext_node;
int ndirty_node, ndirty_meta;
int ndirty_dent, ndirty_dirs, ndirty_data, ndirty_files;
int nats, dirty_nats, sits, dirty_sits, fnids;
int total_count, utilization;
int bg_gc, inmem_pages, wb_pages;
......@@ -1886,7 +1935,7 @@ struct f2fs_stat_info {
int util_free, util_valid, util_invalid;
int rsvd_segs, overp_segs;
int dirty_count, node_pages, meta_pages;
int prefree_count, call_count, cp_count;
int prefree_count, call_count, cp_count, bg_cp_count;
int tot_segs, node_segs, data_segs, free_segs, free_secs;
int bg_node_segs, bg_data_segs;
int tot_blks, data_blks, node_blks;
......@@ -1907,10 +1956,11 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
}
#define stat_inc_cp_count(si) ((si)->cp_count++)
#define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++)
#define stat_inc_call_count(si) ((si)->call_count++)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
#define stat_dec_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]--)
#define stat_inc_total_hit(sbi) (atomic64_inc(&(sbi)->total_hit_ext))
#define stat_inc_rbtree_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_rbtree))
#define stat_inc_largest_node_hit(sbi) (atomic64_inc(&(sbi)->read_hit_largest))
......@@ -1985,14 +2035,15 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
int f2fs_build_stats(struct f2fs_sb_info *);
void f2fs_destroy_stats(struct f2fs_sb_info *);
void __init f2fs_create_root_stats(void);
int __init f2fs_create_root_stats(void);
void f2fs_destroy_root_stats(void);
#else
#define stat_inc_cp_count(si)
#define stat_inc_bg_cp_count(si)
#define stat_inc_call_count(si)
#define stat_inc_bggc_count(si)
#define stat_inc_dirty_dir(sbi)
#define stat_dec_dirty_dir(sbi)
#define stat_inc_dirty_inode(sbi, type)
#define stat_dec_dirty_inode(sbi, type)
#define stat_inc_total_hit(sb)
#define stat_inc_rbtree_node_hit(sb)
#define stat_inc_largest_node_hit(sbi)
......@@ -2013,7 +2064,7 @@ void f2fs_destroy_root_stats(void);
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
static inline void __init f2fs_create_root_stats(void) { }
static inline int __init f2fs_create_root_stats(void) { return 0; }
static inline void f2fs_destroy_root_stats(void) { }
#endif
......@@ -2067,8 +2118,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *);
* extent_cache.c
*/
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
void f2fs_drop_largest_extent(struct inode *, pgoff_t);
void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
unsigned int f2fs_destroy_extent_node(struct inode *);
void f2fs_destroy_extent_tree(struct inode *);
bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
......
......@@ -40,8 +40,6 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct dnode_of_data dn;
int err;
f2fs_balance_fs(sbi);
sb_start_pagefault(inode->i_sb);
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
......@@ -57,6 +55,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, dn.node_changed);
file_update_time(vma->vm_file);
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping ||
......@@ -96,6 +96,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
clear_cold_data(page);
out:
sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME);
return block_page_mkwrite_return(err);
}
......@@ -201,7 +202,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_f2fs_sync_file_enter(inode);
/* if fdatasync is triggered, let's do in-place-update */
if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks)
set_inode_flag(fi, FI_NEED_IPU);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
clear_inode_flag(fi, FI_NEED_IPU);
......@@ -233,9 +234,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
}
go_write:
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
/*
* Both of fdatasync() and fsync() are able to be recovered from
* sudden-power-off.
......@@ -261,8 +259,10 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
sync_node_pages(sbi, ino, &wbc);
/* if cp_error was enabled, we should avoid infinite loop */
if (unlikely(f2fs_cp_error(sbi)))
if (unlikely(f2fs_cp_error(sbi))) {
ret = -EIO;
goto out;
}
if (need_inode_block_update(sbi, ino)) {
mark_inode_dirty_sync(inode);
......@@ -275,12 +275,13 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
/* once recovery info is written, don't need to tack this */
remove_dirty_inode(sbi, ino, APPEND_INO);
remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(fi, FI_APPEND_WRITE);
flush_out:
remove_dirty_inode(sbi, ino, UPDATE_INO);
remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(fi, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(sbi);
f2fs_update_time(sbi, REQ_TIME);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
f2fs_trace_ios(NULL, 1);
......@@ -418,19 +419,18 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file_inode(file);
int err;
if (f2fs_encrypted_inode(inode)) {
int err = f2fs_get_encryption_info(inode);
err = f2fs_get_encryption_info(inode);
if (err)
return 0;
}
/* we don't need to use inline_data strictly */
if (f2fs_has_inline_data(inode)) {
int err = f2fs_convert_inline_inode(inode);
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
}
file_accessed(file);
vma->vm_ops = &f2fs_file_vm_ops;
......@@ -483,11 +483,11 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
F2FS_I(dn->inode)) + ofs;
f2fs_update_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free);
set_page_dirty(dn->node_page);
sync_inode_page(dn);
}
dn->ofs_in_node = ofs;
f2fs_update_time(sbi, REQ_TIME);
trace_f2fs_truncate_data_blocks_range(dn->inode, dn->nid,
dn->ofs_in_node, nr_free);
return nr_free;
......@@ -604,7 +604,7 @@ int f2fs_truncate(struct inode *inode, bool lock)
trace_f2fs_truncate(inode);
/* we should check inline_data size */
if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
......@@ -679,13 +679,20 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
err = f2fs_truncate(inode, true);
if (err)
return err;
f2fs_balance_fs(F2FS_I_SB(inode));
f2fs_balance_fs(F2FS_I_SB(inode), true);
} else {
/*
* do not trim all blocks after i_size if target size is
* larger than i_size.
*/
truncate_setsize(inode, attr->ia_size);
/* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) {
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
}
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
}
}
......@@ -727,7 +734,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
if (!len)
return 0;
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
page = get_new_data_page(inode, NULL, index, false);
......@@ -778,13 +785,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t pg_start, pg_end;
loff_t off_start, off_end;
int ret = 0;
int ret;
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
......@@ -815,7 +820,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
loff_t blk_start, blk_end;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi, true);
blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
......@@ -918,7 +923,7 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
int ret = 0;
for (; end < nrpages; start++, end++) {
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
ret = __exchange_data_block(inode, end, start, true);
f2fs_unlock_op(sbi);
......@@ -941,13 +946,9 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
return -EINVAL;
f2fs_balance_fs(F2FS_I_SB(inode));
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
pg_start = offset >> PAGE_CACHE_SHIFT;
pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
......@@ -991,13 +992,9 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
f2fs_balance_fs(sbi);
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret)
......@@ -1104,13 +1101,11 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
return -EINVAL;
f2fs_balance_fs(sbi);
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
f2fs_balance_fs(sbi, true);
ret = truncate_blocks(inode, i_size_read(inode), true);
if (ret)
......@@ -1154,17 +1149,15 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
loff_t off_start, off_end;
int ret = 0;
f2fs_balance_fs(sbi);
ret = inode_newsize_ok(inode, (len + offset));
if (ret)
return ret;
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
f2fs_balance_fs(sbi, true);
pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
......@@ -1246,6 +1239,7 @@ static long f2fs_fallocate(struct file *file, int mode,
if (!ret) {
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
out:
......@@ -1353,8 +1347,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (!inode_owner_or_capable(inode))
return -EACCES;
f2fs_balance_fs(F2FS_I_SB(inode));
if (f2fs_is_atomic_file(inode))
return 0;
......@@ -1363,6 +1355,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
return ret;
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return 0;
}
......@@ -1384,9 +1378,11 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
ret = commit_inmem_pages(inode, false);
if (ret)
if (ret) {
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
goto err_out;
}
}
ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
err_out:
......@@ -1410,6 +1406,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
return ret;
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return 0;
}
......@@ -1441,13 +1438,17 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
if (ret)
return ret;
f2fs_balance_fs(F2FS_I_SB(inode));
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
commit_inmem_pages(inode, true);
}
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
}
mnt_drop_write_file(filp);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return ret;
}
......@@ -1487,6 +1488,7 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
default:
return -EINVAL;
}
f2fs_update_time(sbi, REQ_TIME);
return 0;
}
......@@ -1517,6 +1519,7 @@ static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return 0;
}
......@@ -1540,6 +1543,7 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
sizeof(policy)))
return -EFAULT;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return f2fs_process_policy(&policy, inode);
#else
return -EOPNOTSUPP;
......@@ -1586,13 +1590,13 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
generate_random_uuid(sbi->raw_super->encrypt_pw_salt);
err = f2fs_commit_super(sbi, false);
mnt_drop_write_file(filp);
if (err) {
/* undo new data */
memset(sbi->raw_super->encrypt_pw_salt, 0, 16);
mnt_drop_write_file(filp);
return err;
}
mnt_drop_write_file(filp);
got_it:
if (copy_to_user((__u8 __user *)arg, sbi->raw_super->encrypt_pw_salt,
16))
......@@ -1629,7 +1633,6 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct cp_control cpc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
......@@ -1637,13 +1640,196 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
if (f2fs_readonly(sbi->sb))
return -EROFS;
cpc.reason = __get_cp_reason(sbi);
return f2fs_sync_fs(sbi->sb, 1);
}
mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
struct file *filp,
struct f2fs_defragment *range)
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map;
struct extent_info ei;
pgoff_t pg_start, pg_end;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg;
block_t blk_end = 0;
bool fragmented = false;
int err;
return 0;
/* if in-place-update policy is enabled, don't waste time here */
if (need_inplace_update(inode))
return -EINVAL;
pg_start = range->start >> PAGE_CACHE_SHIFT;
pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;
f2fs_balance_fs(sbi, true);
mutex_lock(&inode->i_mutex);
/* writeback all dirty pages in the range */
err = filemap_write_and_wait_range(inode->i_mapping, range->start,
range->start + range->len - 1);
if (err)
goto out;
/*
* lookup mapping info in extent cache, skip defragmenting if physical
* block addresses are continuous.
*/
if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
if (ei.fofs + ei.len >= pg_end)
goto out;
}
map.m_lblk = pg_start;
/*
* lookup mapping info in dnode page cache, skip defragmenting if all
* physical block addresses are continuous even if there are hole(s)
* in logical blocks.
*/
while (map.m_lblk < pg_end) {
map.m_len = pg_end - map.m_lblk;
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
if (err)
goto out;
if (!(map.m_flags & F2FS_MAP_FLAGS)) {
map.m_lblk++;
continue;
}
if (blk_end && blk_end != map.m_pblk) {
fragmented = true;
break;
}
blk_end = map.m_pblk + map.m_len;
map.m_lblk += map.m_len;
}
if (!fragmented)
goto out;
map.m_lblk = pg_start;
map.m_len = pg_end - pg_start;
sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec;
/*
* make sure there are enough free section for LFS allocation, this can
* avoid defragment running in SSR mode when free section are allocated
* intensively
*/
if (has_not_enough_free_secs(sbi, sec_num)) {
err = -EAGAIN;
goto out;
}
while (map.m_lblk < pg_end) {
pgoff_t idx;
int cnt = 0;
do_map:
map.m_len = pg_end - map.m_lblk;
err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_READ);
if (err)
goto clear_out;
if (!(map.m_flags & F2FS_MAP_FLAGS)) {
map.m_lblk++;
continue;
}
set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
idx = map.m_lblk;
while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) {
struct page *page;
page = get_lock_data_page(inode, idx, true);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto clear_out;
}
set_page_dirty(page);
f2fs_put_page(page, 1);
idx++;
cnt++;
total++;
}
map.m_lblk = idx;
if (idx < pg_end && cnt < blk_per_seg)
goto do_map;
clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
err = filemap_fdatawrite(inode->i_mapping);
if (err)
goto out;
}
clear_out:
clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG);
out:
mutex_unlock(&inode->i_mutex);
if (!err)
range->len = (u64)total << PAGE_CACHE_SHIFT;
return err;
}
static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_defragment range;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
err = mnt_want_write_file(filp);
if (err)
return err;
if (f2fs_readonly(sbi->sb)) {
err = -EROFS;
goto out;
}
if (copy_from_user(&range, (struct f2fs_defragment __user *)arg,
sizeof(range))) {
err = -EFAULT;
goto out;
}
/* verify alignment of offset & size */
if (range.start & (F2FS_BLKSIZE - 1) ||
range.len & (F2FS_BLKSIZE - 1)) {
err = -EINVAL;
goto out;
}
err = f2fs_defragment_range(sbi, filp, &range);
f2fs_update_time(sbi, REQ_TIME);
if (err < 0)
goto out;
if (copy_to_user((struct f2fs_defragment __user *)arg, &range,
sizeof(range)))
err = -EFAULT;
out:
mnt_drop_write_file(filp);
return err;
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
......@@ -1679,6 +1865,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_gc(filp, arg);
case F2FS_IOC_WRITE_CHECKPOINT:
return f2fs_ioc_write_checkpoint(filp, arg);
case F2FS_IOC_DEFRAGMENT:
return f2fs_ioc_defragment(filp, arg);
default:
return -ENOTTY;
}
......@@ -1706,6 +1894,22 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC32_SETFLAGS:
cmd = F2FS_IOC_SETFLAGS;
break;
case F2FS_IOC32_GETVERSION:
cmd = F2FS_IOC_GETVERSION;
break;
case F2FS_IOC_START_ATOMIC_WRITE:
case F2FS_IOC_COMMIT_ATOMIC_WRITE:
case F2FS_IOC_START_VOLATILE_WRITE:
case F2FS_IOC_RELEASE_VOLATILE_WRITE:
case F2FS_IOC_ABORT_VOLATILE_WRITE:
case F2FS_IOC_SHUTDOWN:
case F2FS_IOC_SET_ENCRYPTION_POLICY:
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
case F2FS_IOC_GET_ENCRYPTION_POLICY:
case F2FS_IOC_GARBAGE_COLLECT:
case F2FS_IOC_WRITE_CHECKPOINT:
case F2FS_IOC_DEFRAGMENT:
break;
default:
return -ENOIOCTLCMD;
}
......
......@@ -16,7 +16,6 @@
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/blkdev.h>
#include "f2fs.h"
#include "node.h"
......@@ -173,9 +172,9 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
{
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
return 1 << sbi->log_blocks_per_seg;
return sbi->blocks_per_seg;
if (p->gc_mode == GC_GREEDY)
return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
return sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else /* No other gc_mode */
......@@ -832,8 +831,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
if (unlikely(f2fs_cp_error(sbi)))
if (unlikely(f2fs_cp_error(sbi))) {
ret = -EIO;
goto stop;
}
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
gc_type = FG_GC;
......
......@@ -100,11 +100,3 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
return true;
return false;
}
static inline int is_idle(struct f2fs_sb_info *sbi)
{
struct block_device *bdev = sbi->sb->s_bdev;
struct request_queue *q = bdev_get_queue(bdev);
struct request_list *rl = &q->root_rl;
return !(rl->count[BLK_RW_SYNC]) && !(rl->count[BLK_RW_ASYNC]);
}
......@@ -16,9 +16,6 @@
bool f2fs_may_inline_data(struct inode *inode)
{
if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
return false;
if (f2fs_is_atomic_file(inode))
return false;
......@@ -177,6 +174,9 @@ int f2fs_convert_inline_inode(struct inode *inode)
struct page *ipage, *page;
int err = 0;
if (!f2fs_has_inline_data(inode))
return 0;
page = grab_cache_page(inode->i_mapping, 0);
if (!page)
return -ENOMEM;
......@@ -199,6 +199,9 @@ int f2fs_convert_inline_inode(struct inode *inode)
f2fs_unlock_op(sbi);
f2fs_put_page(page, 1);
f2fs_balance_fs(sbi, dn.node_changed);
return err;
}
......
......@@ -138,7 +138,8 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
f2fs_init_extent_tree(inode, &ri->i_ext);
if (f2fs_init_extent_tree(inode, &ri->i_ext))
set_page_dirty(node_page);
get_inline_info(fi, ri);
......@@ -222,7 +223,7 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
return ERR_PTR(ret);
}
void update_inode(struct inode *inode, struct page *node_page)
int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
......@@ -260,15 +261,16 @@ void update_inode(struct inode *inode, struct page *node_page)
__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
set_page_dirty(node_page);
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
return set_page_dirty(node_page);
}
void update_inode_page(struct inode *inode)
int update_inode_page(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *node_page;
int ret = 0;
retry:
node_page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
......@@ -279,10 +281,11 @@ void update_inode_page(struct inode *inode)
} else if (err != -ENOENT) {
f2fs_stop_checkpoint(sbi);
}
return;
return 0;
}
update_inode(inode, node_page);
ret = update_inode(inode, node_page);
f2fs_put_page(node_page, 1);
return ret;
}
int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
......@@ -300,9 +303,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
* We need to balance fs here to prevent from producing dirty node pages
* during the urgent cleaning time when runing out of free sections.
*/
update_inode_page(inode);
f2fs_balance_fs(sbi);
if (update_inode_page(inode))
f2fs_balance_fs(sbi, true);
return 0;
}
......@@ -328,7 +330,7 @@ void f2fs_evict_inode(struct inode *inode)
goto out_clear;
f2fs_bug_on(sbi, get_dirty_pages(inode));
remove_dirty_dir_inode(inode);
remove_dirty_inode(inode);
f2fs_destroy_extent_tree(inode);
......@@ -358,9 +360,9 @@ void f2fs_evict_inode(struct inode *inode)
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (is_inode_flag_set(fi, FI_APPEND_WRITE))
add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
add_ino_entry(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
if (is_inode_flag_set(fi, FI_FREE_NID)) {
if (err && err != -ENOENT)
alloc_nid_done(sbi, inode->i_ino);
......
......@@ -60,7 +60,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode))
f2fs_set_encrypted_inode(inode);
if (f2fs_may_inline_data(inode))
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
......@@ -128,8 +128,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
nid_t ino = 0;
int err;
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
......@@ -142,6 +140,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
ino = inode->i_ino;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
......@@ -172,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
!f2fs_is_child_context_consistent_with_parent(dir, inode))
return -EPERM;
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi, true);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
......@@ -214,6 +214,15 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
struct page *page;
int err = 0;
if (f2fs_readonly(sbi->sb)) {
f2fs_msg(sbi->sb, KERN_INFO,
"skip recovering inline_dots inode (ino:%lu, pino:%u) "
"in readonly mountpoint", dir->i_ino, pino);
return 0;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
de = f2fs_find_entry(dir, &dot, &page);
......@@ -288,12 +297,13 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
int err = -ENOENT;
trace_f2fs_unlink_enter(dir, dentry);
f2fs_balance_fs(sbi);
de = f2fs_find_entry(dir, &dentry->d_name, &page);
if (!de)
goto fail;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err) {
......@@ -344,8 +354,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
if (len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
if (IS_ERR(inode))
return PTR_ERR(inode);
......@@ -357,6 +365,8 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
inode_nohighmem(inode);
inode->i_mapping->a_ops = &f2fs_dblock_aops;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
......@@ -437,8 +447,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
struct inode *inode;
int err;
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, S_IFDIR | mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
......@@ -448,6 +456,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
inode->i_mapping->a_ops = &f2fs_dblock_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
f2fs_balance_fs(sbi, true);
set_inode_flag(F2FS_I(inode), FI_INC_LINK);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
......@@ -485,8 +495,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err = 0;
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
......@@ -494,6 +502,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
inode->i_op = &f2fs_special_inode_operations;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(dentry, inode);
if (err)
......@@ -520,9 +530,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
struct inode *inode;
int err;
if (!whiteout)
f2fs_balance_fs(sbi);
inode = f2fs_new_inode(dir, mode);
if (IS_ERR(inode))
return PTR_ERR(inode);
......@@ -536,6 +543,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
inode->i_mapping->a_ops = &f2fs_dblock_aops;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
if (err)
......@@ -608,8 +617,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
}
f2fs_balance_fs(sbi);
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
goto out;
......@@ -639,6 +646,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!new_entry)
goto out_whiteout;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = acquire_orphan_inode(sbi);
......@@ -670,6 +679,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
update_inode_page(old_inode);
update_inode_page(new_inode);
} else {
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = f2fs_add_link(new_dentry, old_inode);
......@@ -767,8 +778,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new_inode)))
return -EPERM;
f2fs_balance_fs(sbi);
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
if (!old_entry)
goto out;
......@@ -811,6 +820,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_new_dir;
}
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name);
......@@ -933,7 +944,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
struct f2fs_str cstr;
struct f2fs_str cstr = FSTR_INIT(NULL, 0);
struct f2fs_str pstr = FSTR_INIT(NULL, 0);
struct f2fs_encrypted_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
......@@ -956,6 +967,12 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
/* Symlink is encrypted */
sd = (struct f2fs_encrypted_symlink_data *)caddr;
cstr.len = le16_to_cpu(sd->len);
/* this is broken symlink case */
if (unlikely(cstr.len == 0)) {
res = -ENOENT;
goto errout;
}
cstr.name = kmalloc(cstr.len, GFP_NOFS);
if (!cstr.name) {
res = -ENOMEM;
......@@ -964,7 +981,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
memcpy(cstr.name, sd->encrypted_path, cstr.len);
/* this is broken symlink case */
if (cstr.name[0] == 0 && cstr.len == 0) {
if (unlikely(cstr.name[0] == 0)) {
res = -ENOENT;
goto errout;
}
......@@ -1005,10 +1022,12 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.get_link = f2fs_encrypted_get_link,
.getattr = f2fs_getattr,
.setattr = f2fs_setattr,
#ifdef CONFIG_F2FS_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.listxattr = f2fs_listxattr,
.removexattr = generic_removexattr,
#endif
};
#endif
......
......@@ -65,13 +65,14 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == EXTENT_CACHE) {
mem_size = (sbi->total_ext_tree * sizeof(struct extent_tree) +
mem_size = (atomic_read(&sbi->total_ext_tree) *
sizeof(struct extent_tree) +
atomic_read(&sbi->total_ext_node) *
sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else {
if (sbi->sb->s_bdi->wb.dirty_exceeded)
return false;
if (!sbi->sb->s_bdi->wb.dirty_exceeded)
return true;
}
return res;
}
......@@ -261,13 +262,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
{
struct nat_entry *e;
down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
}
up_write(&nm_i->nat_tree_lock);
}
static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
......@@ -379,6 +378,8 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
down_write(&nm_i->nat_tree_lock);
/* Check current segment summary */
mutex_lock(&curseg->curseg_mutex);
i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
......@@ -399,6 +400,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
cache:
/* cache nat entry */
cache_nat_entry(NM_I(sbi), nid, &ne);
up_write(&nm_i->nat_tree_lock);
}
/*
......@@ -676,7 +678,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
ret = truncate_dnode(&rdn);
if (ret < 0)
goto out_err;
set_nid(page, i, 0, false);
if (set_nid(page, i, 0, false))
dn->node_changed = true;
}
} else {
child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
......@@ -689,7 +692,8 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
rdn.nid = child_nid;
ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
if (ret == (NIDS_PER_BLOCK + 1)) {
set_nid(page, i, 0, false);
if (set_nid(page, i, 0, false))
dn->node_changed = true;
child_nofs += ret;
} else if (ret < 0 && ret != -ENOENT) {
goto out_err;
......@@ -750,7 +754,8 @@ static int truncate_partial_nodes(struct dnode_of_data *dn,
err = truncate_dnode(dn);
if (err < 0)
goto fail;
set_nid(pages[idx], i, 0, false);
if (set_nid(pages[idx], i, 0, false))
dn->node_changed = true;
}
if (offset[idx + 1] == 0) {
......@@ -975,7 +980,8 @@ struct page *new_node_page(struct dnode_of_data *dn,
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
set_page_dirty(page);
if (set_page_dirty(page))
dn->node_changed = true;
if (f2fs_has_xattr_block(ofs))
F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
......@@ -1035,6 +1041,10 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
struct page *apage;
int err;
if (!nid)
return;
f2fs_bug_on(sbi, check_nid_range(sbi, nid));
apage = find_get_page(NODE_MAPPING(sbi), nid);
if (apage && PageUptodate(apage)) {
f2fs_put_page(apage, 0);
......@@ -1050,51 +1060,38 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
f2fs_put_page(apage, err ? 1 : 0);
}
struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
/*
* readahead MAX_RA_NODE number of node pages.
*/
void ra_node_pages(struct page *parent, int start)
{
struct page *page;
int err;
repeat:
page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
return ERR_PTR(-ENOMEM);
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
struct blk_plug plug;
int i, end;
nid_t nid;
err = read_node_page(page, READ_SYNC);
if (err < 0) {
f2fs_put_page(page, 1);
return ERR_PTR(err);
} else if (err != LOCKED_PAGE) {
lock_page(page);
}
blk_start_plug(&plug);
if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
ClearPageUptodate(page);
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
/* Then, try readahead for siblings of the desired node */
end = start + MAX_RA_NODE;
end = min(end, NIDS_PER_BLOCK);
for (i = start; i < end; i++) {
nid = get_nid(parent, i, false);
ra_node_page(sbi, nid);
}
return page;
blk_finish_plug(&plug);
}
/*
* Return a locked page for the desired node page.
* And, readahead MAX_RA_NODE number of node pages.
*/
struct page *get_node_page_ra(struct page *parent, int start)
struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
struct blk_plug plug;
struct page *page;
int err, i, end;
nid_t nid;
int err;
/* First, try getting the desired direct node. */
nid = get_nid(parent, start, false);
if (!nid)
return ERR_PTR(-ENOENT);
f2fs_bug_on(sbi, check_nid_range(sbi, nid));
repeat:
page = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!page)
......@@ -1108,46 +1105,53 @@ struct page *get_node_page_ra(struct page *parent, int start)
goto page_hit;
}
blk_start_plug(&plug);
/* Then, try readahead for siblings of the desired node */
end = start + MAX_RA_NODE;
end = min(end, NIDS_PER_BLOCK);
for (i = start + 1; i < end; i++) {
nid = get_nid(parent, i, false);
if (!nid)
continue;
ra_node_page(sbi, nid);
}
blk_finish_plug(&plug);
if (parent)
ra_node_pages(parent, start + 1);
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
page_hit:
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
f2fs_bug_on(sbi, nid != nid_of_node(page));
return page;
}
struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
{
return __get_node_page(sbi, nid, NULL, 0);
}
struct page *get_node_page_ra(struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
nid_t nid = get_nid(parent, start, false);
return __get_node_page(sbi, nid, parent, start);
}
void sync_inode_page(struct dnode_of_data *dn)
{
int ret = 0;
if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
update_inode(dn->inode, dn->node_page);
ret = update_inode(dn->inode, dn->node_page);
} else if (dn->inode_page) {
if (!dn->inode_page_locked)
lock_page(dn->inode_page);
update_inode(dn->inode, dn->inode_page);
ret = update_inode(dn->inode, dn->inode_page);
if (!dn->inode_page_locked)
unlock_page(dn->inode_page);
} else {
update_inode_page(dn->inode);
ret = update_inode_page(dn->inode);
}
dn->node_changed = ret ? true: false;
}
int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
......@@ -1175,6 +1179,11 @@ int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
if (unlikely(f2fs_cp_error(sbi))) {
pagevec_release(&pvec);
return -EIO;
}
/*
* flushing sequence with step:
* 0. indirect nodes
......@@ -1349,7 +1358,7 @@ static int f2fs_write_node_page(struct page *page,
up_read(&sbi->node_write);
unlock_page(page);
if (wbc->for_reclaim)
if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, NODE, WRITE);
return 0;
......@@ -1440,13 +1449,10 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
if (build) {
/* do not add allocated nids */
down_read(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
if (ne &&
(!get_nat_flag(ne, IS_CHECKPOINTED) ||
if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
nat_get_blkaddr(ne) != NULL_ADDR))
allocated = true;
up_read(&nm_i->nat_tree_lock);
if (allocated)
return 0;
}
......@@ -1532,6 +1538,8 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
down_read(&nm_i->nat_tree_lock);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
......@@ -1560,6 +1568,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
remove_free_nid(nm_i, nid);
}
mutex_unlock(&curseg->curseg_mutex);
up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
......@@ -1582,8 +1591,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
/* We should not use stale free nids created by build_free_nids */
if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
struct node_info ni;
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW)
......@@ -1594,13 +1601,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
i->state = NID_ALLOC;
nm_i->fcnt--;
spin_unlock(&nm_i->free_nid_list_lock);
/* check nid is allocated already */
get_node_info(sbi, *nid, &ni);
if (ni.blk_addr != NULL_ADDR) {
alloc_nid_done(sbi, *nid);
goto retry;
}
return true;
}
spin_unlock(&nm_i->free_nid_list_lock);
......@@ -1842,14 +1842,12 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
raw_ne = nat_in_journal(sum, i);
down_write(&nm_i->nat_tree_lock);
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
ne = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
__set_nat_cache_dirty(nm_i, ne);
up_write(&nm_i->nat_tree_lock);
}
update_nats_in_cursum(sum, -i);
mutex_unlock(&curseg->curseg_mutex);
......@@ -1883,7 +1881,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct f2fs_nat_block *nat_blk;
struct nat_entry *ne, *cur;
struct page *page = NULL;
struct f2fs_nm_info *nm_i = NM_I(sbi);
/*
* there are two steps to flush nat entries:
......@@ -1920,12 +1917,8 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
raw_ne = &nat_blk->entries[nid - start_nid];
}
raw_nat_from_node_info(raw_ne, &ne->ni);
down_write(&NM_I(sbi)->nat_tree_lock);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
up_write(&NM_I(sbi)->nat_tree_lock);
if (nat_get_blkaddr(ne) == NULL_ADDR)
add_free_nid(sbi, nid, false);
}
......@@ -1937,9 +1930,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
f2fs_bug_on(sbi, set->entry_cnt);
down_write(&nm_i->nat_tree_lock);
radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
up_write(&nm_i->nat_tree_lock);
kmem_cache_free(nat_entry_set_slab, set);
}
......@@ -1959,6 +1950,9 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!nm_i->dirty_nat_cnt)
return;
down_write(&nm_i->nat_tree_lock);
/*
* if there are no enough space in journal to store dirty nat
* entries, remove all entries from journal and merge them
......@@ -1967,7 +1961,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, SETVEC_SIZE, setvec))) {
unsigned idx;
......@@ -1976,12 +1969,13 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
__adjust_nat_entry_set(setvec[idx], &sets,
MAX_NAT_JENTRIES(sum));
}
up_write(&nm_i->nat_tree_lock);
/* flush dirty nats in nat entry set */
list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set);
up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
......
......@@ -183,7 +183,7 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start)
block_addr = (pgoff_t)(nm_i->nat_blkaddr +
(seg_off << sbi->log_blocks_per_seg << 1) +
(block_off & ((1 << sbi->log_blocks_per_seg) - 1)));
(block_off & (sbi->blocks_per_seg - 1)));
if (f2fs_test_bit(block_off, nm_i->nat_bitmap))
block_addr += sbi->blocks_per_seg;
......@@ -317,7 +317,7 @@ static inline bool IS_DNODE(struct page *node_page)
return true;
}
static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
......@@ -327,7 +327,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
else
rn->in.nid[off] = cpu_to_le32(nid);
set_page_dirty(p);
return set_page_dirty(p);
}
static inline nid_t get_nid(struct page *p, int off, bool i)
......
......@@ -168,6 +168,32 @@ static void recover_inode(struct inode *inode, struct page *page)
ino_of_node(page), name);
}
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
struct f2fs_inode *ri = F2FS_INODE(ipage);
struct timespec disk;
if (!IS_INODE(ipage))
return true;
disk.tv_sec = le64_to_cpu(ri->i_ctime);
disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
if (timespec_compare(&inode->i_ctime, &disk) > 0)
return false;
disk.tv_sec = le64_to_cpu(ri->i_atime);
disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
if (timespec_compare(&inode->i_atime, &disk) > 0)
return false;
disk.tv_sec = le64_to_cpu(ri->i_mtime);
disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
if (timespec_compare(&inode->i_mtime, &disk) > 0)
return false;
return true;
}
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
......@@ -197,7 +223,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
goto next;
entry = get_fsync_inode(head, ino_of_node(page));
if (!entry) {
if (entry) {
if (!is_same_inode(entry->inode, page))
goto next;
} else {
if (IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
......@@ -459,8 +488,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
return err;
}
static int recover_data(struct f2fs_sb_info *sbi,
struct list_head *head, int type)
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
{
unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
struct curseg_info *curseg;
......@@ -469,7 +497,7 @@ static int recover_data(struct f2fs_sb_info *sbi,
block_t blkaddr;
/* get node pages in the current segment */
curseg = CURSEG_I(sbi, type);
curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
while (1) {
......@@ -556,7 +584,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
need_writecp = true;
/* step #2: recover data */
err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
err = recover_data(sbi, &inode_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
out:
......@@ -595,7 +623,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
.reason = CP_RECOVERY,
};
mutex_unlock(&sbi->cp_mutex);
write_checkpoint(sbi, &cpc);
err = write_checkpoint(sbi, &cpc);
} else {
mutex_unlock(&sbi->cp_mutex);
}
......
......@@ -86,6 +86,7 @@ static inline unsigned long __reverse_ffs(unsigned long word)
/*
* __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
* f2fs_set_bit makes MSB and LSB reversed in a byte.
* @size must be integral times of unsigned long.
* Example:
* MSB <--> LSB
* f2fs_set_bit(0, bitmap) => 1000 0000
......@@ -95,94 +96,73 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BIT_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long result = size;
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
size -= (offset & ~(BITS_PER_LONG - 1));
offset %= BITS_PER_LONG;
if (!offset)
goto aligned;
while (1) {
if (*p == 0)
goto pass;
tmp = __reverse_ulong((unsigned char *)p);
tmp &= ~0UL >> offset;
tmp &= ~0UL >> offset;
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
p++;
aligned:
while (size & ~(BITS_PER_LONG-1)) {
tmp = __reverse_ulong((unsigned char *)p);
tmp &= (~0UL << (BITS_PER_LONG - size));
if (tmp)
goto found_middle;
result += BITS_PER_LONG;
goto found;
pass:
if (size <= BITS_PER_LONG)
break;
size -= BITS_PER_LONG;
offset = 0;
p++;
}
if (!size)
return result;
tmp = __reverse_ulong((unsigned char *)p);
found_first:
tmp &= (~0UL << (BITS_PER_LONG - size));
if (!tmp) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __reverse_ffs(tmp);
found:
return result - size + __reverse_ffs(tmp);
}
static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BIT_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long result = size;
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
size -= (offset & ~(BITS_PER_LONG - 1));
offset %= BITS_PER_LONG;
if (!offset)
goto aligned;
while (1) {
if (*p == ~0UL)
goto pass;
tmp = __reverse_ulong((unsigned char *)p);
tmp |= ~((~0UL << offset) >> offset);
if (offset)
tmp |= ~0UL << (BITS_PER_LONG - offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp != ~0UL)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
p++;
aligned:
while (size & ~(BITS_PER_LONG - 1)) {
tmp = __reverse_ulong((unsigned char *)p);
tmp |= ~0UL >> size;
if (tmp != ~0UL)
goto found_middle;
result += BITS_PER_LONG;
goto found;
pass:
if (size <= BITS_PER_LONG)
break;
size -= BITS_PER_LONG;
offset = 0;
p++;
}
if (!size)
return result;
tmp = __reverse_ulong((unsigned char *)p);
found_first:
tmp |= ~(~0UL << (BITS_PER_LONG - size));
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found_middle:
return result + __reverse_ffz(tmp);
found:
return result - size + __reverse_ffz(tmp);
}
void register_inmem_page(struct inode *inode, struct page *page)
......@@ -233,7 +213,7 @@ int commit_inmem_pages(struct inode *inode, bool abort)
* inode becomes free by iget_locked in f2fs_iget.
*/
if (!abort) {
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
}
......@@ -257,6 +237,7 @@ int commit_inmem_pages(struct inode *inode, bool abort)
submit_bio = true;
}
} else {
ClearPageUptodate(cur->page);
trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
}
set_page_private(cur->page, 0);
......@@ -281,8 +262,10 @@ int commit_inmem_pages(struct inode *inode, bool abort)
* This function balances dirty node and dentry pages.
* In addition, it controls garbage collection.
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi)
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
if (!need)
return;
/*
* We should do GC or end up with checkpoint, if there are so many dirty
* dir/node pages without enough free segments.
......@@ -310,8 +293,12 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (!available_free_memory(sbi, NAT_ENTRIES) ||
excess_prefree_segs(sbi) ||
!available_free_memory(sbi, INO_ENTRIES) ||
jiffies > sbi->cp_expires)
(is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
if (test_opt(sbi, DATA_FLUSH))
sync_dirty_inodes(sbi, FILE_INODE);
f2fs_sync_fs(sbi->sb, true);
stat_inc_bg_cp_count(sbi->stat_info);
}
}
static int issue_flush_thread(void *data)
......@@ -1134,6 +1121,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
unsigned int start_segno, end_segno;
struct cp_control cpc;
int err = 0;
if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
return -EINVAL;
......@@ -1164,12 +1152,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
sbi->segs_per_sec) - 1, end_segno);
mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
}
out:
range->len = F2FS_BLK_TO_BYTES(cpc.trimmed);
return 0;
return err;
}
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
......@@ -1749,13 +1737,13 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
if (le32_to_cpu(nid_in_journal(sum, i)) == val)
return i;
}
if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL))
return update_nats_in_cursum(sum, 1);
} else if (type == SIT_JOURNAL) {
for (i = 0; i < sits_in_cursum(sum); i++)
if (le32_to_cpu(segno_in_journal(sum, i)) == val)
return i;
if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL))
return update_sits_in_cursum(sum, 1);
}
return -1;
......
......@@ -32,7 +32,8 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
{
return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node);
return atomic_read(&sbi->total_zombie_tree) +
atomic_read(&sbi->total_ext_node);
}
unsigned long f2fs_shrink_count(struct shrinker *shrink,
......
......@@ -67,6 +67,7 @@ enum {
Opt_extent_cache,
Opt_noextent_cache,
Opt_noinline_data,
Opt_data_flush,
Opt_err,
};
......@@ -91,6 +92,7 @@ static match_table_t f2fs_tokens = {
{Opt_extent_cache, "extent_cache"},
{Opt_noextent_cache, "noextent_cache"},
{Opt_noinline_data, "noinline_data"},
{Opt_data_flush, "data_flush"},
{Opt_err, NULL},
};
......@@ -216,7 +218,8 @@ F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, cp_interval);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
......@@ -235,6 +238,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
ATTR_LIST(cp_interval),
ATTR_LIST(idle_interval),
NULL,
};
......@@ -406,6 +410,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_noinline_data:
clear_opt(sbi, INLINE_DATA);
break;
case Opt_data_flush:
set_opt(sbi, DATA_FLUSH);
break;
default:
f2fs_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" or missing value",
......@@ -432,6 +439,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
fi->i_current_depth = 1;
fi->i_advise = 0;
init_rwsem(&fi->i_sem);
INIT_LIST_HEAD(&fi->dirty_list);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
......@@ -548,7 +556,7 @@ static void f2fs_put_super(struct super_block *sb)
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
*/
release_dirty_inode(sbi);
release_ino_entry(sbi);
release_discard_addrs(sbi);
f2fs_leave_shrinker(sbi);
......@@ -566,13 +574,14 @@ static void f2fs_put_super(struct super_block *sb)
wait_for_completion(&sbi->s_kobj_unregister);
sb->s_fs_info = NULL;
brelse(sbi->raw_super_buf);
kfree(sbi->raw_super);
kfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
{
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int err = 0;
trace_f2fs_sync_fs(sb, sync);
......@@ -582,14 +591,12 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
cpc.reason = __get_cp_reason(sbi);
mutex_lock(&sbi->gc_mutex);
write_checkpoint(sbi, &cpc);
err = write_checkpoint(sbi, &cpc);
mutex_unlock(&sbi->gc_mutex);
} else {
f2fs_balance_fs(sbi);
}
f2fs_trace_ios(NULL, 1);
return 0;
return err;
}
static int f2fs_freeze(struct super_block *sb)
......@@ -686,6 +693,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",extent_cache");
else
seq_puts(seq, ",noextent_cache");
if (test_opt(sbi, DATA_FLUSH))
seq_puts(seq, ",data_flush");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
......@@ -898,7 +907,7 @@ static const struct export_operations f2fs_export_ops = {
.get_parent = f2fs_get_parent,
};
static loff_t max_file_size(unsigned bits)
static loff_t max_file_blocks(void)
{
loff_t result = (DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS);
loff_t leaf_count = ADDRS_PER_BLOCK;
......@@ -914,10 +923,82 @@ static loff_t max_file_size(unsigned bits)
leaf_count *= NIDS_PER_BLOCK;
result += leaf_count;
result <<= bits;
return result;
}
static inline bool sanity_check_area_boundary(struct super_block *sb,
struct f2fs_super_block *raw_super)
{
u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
u32 segment_count = le32_to_cpu(raw_super->segment_count);
u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (segment0_blkaddr != cp_blkaddr) {
f2fs_msg(sb, KERN_INFO,
"Mismatch start address, segment0(%u) cp_blkaddr(%u)",
segment0_blkaddr, cp_blkaddr);
return true;
}
if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
sit_blkaddr) {
f2fs_msg(sb, KERN_INFO,
"Wrong CP boundary, start(%u) end(%u) blocks(%u)",
cp_blkaddr, sit_blkaddr,
segment_count_ckpt << log_blocks_per_seg);
return true;
}
if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
nat_blkaddr) {
f2fs_msg(sb, KERN_INFO,
"Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
sit_blkaddr, nat_blkaddr,
segment_count_sit << log_blocks_per_seg);
return true;
}
if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
ssa_blkaddr) {
f2fs_msg(sb, KERN_INFO,
"Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
nat_blkaddr, ssa_blkaddr,
segment_count_nat << log_blocks_per_seg);
return true;
}
if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
main_blkaddr) {
f2fs_msg(sb, KERN_INFO,
"Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
ssa_blkaddr, main_blkaddr,
segment_count_ssa << log_blocks_per_seg);
return true;
}
if (main_blkaddr + (segment_count_main << log_blocks_per_seg) !=
segment0_blkaddr + (segment_count << log_blocks_per_seg)) {
f2fs_msg(sb, KERN_INFO,
"Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)",
main_blkaddr,
segment0_blkaddr + (segment_count << log_blocks_per_seg),
segment_count_main << log_blocks_per_seg);
return true;
}
return false;
}
static int sanity_check_raw_super(struct super_block *sb,
struct f2fs_super_block *raw_super)
{
......@@ -947,6 +1028,14 @@ static int sanity_check_raw_super(struct super_block *sb,
return 1;
}
/* check log blocks per segment */
if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
f2fs_msg(sb, KERN_INFO,
"Invalid log blocks per segment (%u)\n",
le32_to_cpu(raw_super->log_blocks_per_seg));
return 1;
}
/* Currently, support 512/1024/2048/4096 bytes sector size */
if (le32_to_cpu(raw_super->log_sectorsize) >
F2FS_MAX_LOG_SECTOR_SIZE ||
......@@ -965,6 +1054,23 @@ static int sanity_check_raw_super(struct super_block *sb,
le32_to_cpu(raw_super->log_sectorsize));
return 1;
}
/* check reserved ino info */
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
le32_to_cpu(raw_super->root_ino) != 3) {
f2fs_msg(sb, KERN_INFO,
"Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
le32_to_cpu(raw_super->node_ino),
le32_to_cpu(raw_super->meta_ino),
le32_to_cpu(raw_super->root_ino));
return 1;
}
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
if (sanity_check_area_boundary(sb, raw_super))
return 1;
return 0;
}
......@@ -1018,7 +1124,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
atomic_set(&sbi->nr_pages[i], 0);
sbi->dir_level = DEF_DIR_LEVEL;
sbi->cp_interval = DEF_CP_INTERVAL;
sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
INIT_LIST_HEAD(&sbi->s_list);
......@@ -1032,111 +1139,114 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
*/
static int read_raw_super_block(struct super_block *sb,
struct f2fs_super_block **raw_super,
struct buffer_head **raw_super_buf,
int *recovery)
int *valid_super_block, int *recovery)
{
int block = 0;
struct buffer_head *buffer;
struct f2fs_super_block *super;
struct buffer_head *bh;
struct f2fs_super_block *super, *buf;
int err = 0;
super = kzalloc(sizeof(struct f2fs_super_block), GFP_KERNEL);
if (!super)
return -ENOMEM;
retry:
buffer = sb_bread(sb, block);
if (!buffer) {
bh = sb_bread(sb, block);
if (!bh) {
*recovery = 1;
f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
block + 1);
if (block == 0) {
block++;
goto retry;
} else {
err = -EIO;
goto out;
}
goto next;
}
super = (struct f2fs_super_block *)
((char *)(buffer)->b_data + F2FS_SUPER_OFFSET);
buf = (struct f2fs_super_block *)(bh->b_data + F2FS_SUPER_OFFSET);
/* sanity checking of raw super */
if (sanity_check_raw_super(sb, super)) {
brelse(buffer);
if (sanity_check_raw_super(sb, buf)) {
brelse(bh);
*recovery = 1;
f2fs_msg(sb, KERN_ERR,
"Can't find valid F2FS filesystem in %dth superblock",
block + 1);
if (block == 0) {
block++;
goto retry;
} else {
err = -EINVAL;
goto out;
}
goto next;
}
if (!*raw_super) {
*raw_super_buf = buffer;
memcpy(super, buf, sizeof(*super));
*valid_super_block = block;
*raw_super = super;
} else {
/* already have a valid superblock */
brelse(buffer);
}
brelse(bh);
next:
/* check the validity of the second superblock */
if (block == 0) {
block++;
goto retry;
}
out:
/* No valid superblock */
if (!*raw_super)
if (!*raw_super) {
kfree(super);
return err;
}
return 0;
}
static int __f2fs_commit_super(struct f2fs_sb_info *sbi, int block)
{
struct f2fs_super_block *super = F2FS_RAW_SUPER(sbi);
struct buffer_head *bh;
int err;
bh = sb_getblk(sbi->sb, block);
if (!bh)
return -EIO;
lock_buffer(bh);
memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
set_buffer_uptodate(bh);
set_buffer_dirty(bh);
unlock_buffer(bh);
/* it's rare case, we can do fua all the time */
err = __sync_dirty_buffer(bh, WRITE_FLUSH_FUA);
brelse(bh);
return err;
}
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
{
struct buffer_head *sbh = sbi->raw_super_buf;
sector_t block = sbh->b_blocknr;
int err;
/* write back-up superblock first */
sbh->b_blocknr = block ? 0 : 1;
mark_buffer_dirty(sbh);
err = sync_dirty_buffer(sbh);
sbh->b_blocknr = block;
err = __f2fs_commit_super(sbi, sbi->valid_super_block ? 0 : 1);
/* if we are in recovery path, skip writing valid superblock */
if (recover || err)
goto out;
return err;
/* write current valid superblock */
mark_buffer_dirty(sbh);
err = sync_dirty_buffer(sbh);
out:
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
return err;
return __f2fs_commit_super(sbi, sbi->valid_super_block);
}
static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
{
struct f2fs_sb_info *sbi;
struct f2fs_super_block *raw_super;
struct buffer_head *raw_super_buf;
struct inode *root;
long err;
bool retry = true, need_fsck = false;
char *options = NULL;
int recovery, i;
int recovery, i, valid_super_block;
try_onemore:
err = -EINVAL;
raw_super = NULL;
raw_super_buf = NULL;
valid_super_block = -1;
recovery = 0;
/* allocate memory for f2fs-specific super block info */
......@@ -1150,7 +1260,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_sbi;
}
err = read_raw_super_block(sb, &raw_super, &raw_super_buf, &recovery);
err = read_raw_super_block(sb, &raw_super, &valid_super_block,
&recovery);
if (err)
goto free_sbi;
......@@ -1167,7 +1278,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto free_options;
sb->s_maxbytes = max_file_size(le32_to_cpu(raw_super->log_blocksize));
sbi->max_file_blocks = max_file_blocks();
sb->s_maxbytes = sbi->max_file_blocks <<
le32_to_cpu(raw_super->log_blocksize);
sb->s_max_links = F2FS_LINK_MAX;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
......@@ -1183,7 +1296,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* init f2fs-specific super block info */
sbi->sb = sb;
sbi->raw_super = raw_super;
sbi->raw_super_buf = raw_super_buf;
sbi->valid_super_block = valid_super_block;
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
......@@ -1236,8 +1349,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
le64_to_cpu(sbi->ckpt->valid_block_count);
sbi->last_valid_block_count = sbi->total_valid_block_count;
sbi->alloc_valid_block_count = 0;
INIT_LIST_HEAD(&sbi->dir_inode_list);
spin_lock_init(&sbi->dir_inode_lock);
for (i = 0; i < NR_INODE_TYPE; i++) {
INIT_LIST_HEAD(&sbi->inode_list[i]);
spin_lock_init(&sbi->inode_lock[i]);
}
init_extent_cache_info(sbi);
......@@ -1355,12 +1470,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_commit_super(sbi, true);
}
sbi->cp_expires = round_jiffies_up(jiffies);
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
return 0;
free_kobj:
kobject_del(&sbi->s_kobj);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
free_proc:
if (sbi->s_proc) {
remove_proc_entry("segment_info", sbi->s_proc);
......@@ -1387,7 +1504,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
free_options:
kfree(options);
free_sb_buf:
brelse(raw_super_buf);
kfree(raw_super);
free_sbi:
kfree(sbi);
......@@ -1478,10 +1595,14 @@ static int __init init_f2fs_fs(void)
err = register_filesystem(&f2fs_fs_type);
if (err)
goto free_shrinker;
f2fs_create_root_stats();
err = f2fs_create_root_stats();
if (err)
goto free_filesystem;
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
return 0;
free_filesystem:
unregister_filesystem(&f2fs_fs_type);
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_crypto:
......
......@@ -571,7 +571,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
if (ipage)
return __f2fs_setxattr(inode, index, name, value,
size, ipage, flags);
f2fs_balance_fs(sbi);
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
/* protect xattr_ver */
......@@ -580,5 +580,6 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
f2fs_update_time(sbi, REQ_TIME);
return err;
}
......@@ -51,6 +51,7 @@
#define MAX_ACTIVE_DATA_LOGS 8
#define VERSION_LEN 256
#define MAX_VOLUME_NAME 512
/*
* For superblock
......@@ -84,7 +85,7 @@ struct f2fs_super_block {
__le32 node_ino; /* node inode number */
__le32 meta_ino; /* meta inode number */
__u8 uuid[16]; /* 128-bit uuid for volume */
__le16 volume_name[512]; /* volume name */
__le16 volume_name[MAX_VOLUME_NAME]; /* volume name */
__le32 extension_count; /* # of extensions below */
__u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */
__le32 cp_payload;
......
......@@ -1265,6 +1265,44 @@ TRACE_EVENT(f2fs_destroy_extent_tree,
__entry->node_cnt)
);
DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
TP_PROTO(struct super_block *sb, int type, int count),
TP_ARGS(sb, type, count),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, type)
__field(int, count)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->type = type;
__entry->count = count;
),
TP_printk("dev = (%d,%d), %s, dirty count = %d",
show_dev(__entry),
show_file_type(__entry->type),
__entry->count)
);
DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_enter,
TP_PROTO(struct super_block *sb, int type, int count),
TP_ARGS(sb, type, count)
);
DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit,
TP_PROTO(struct super_block *sb, int type, int count),
TP_ARGS(sb, type, count)
);
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment