Commit 6d8ef53e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'f2fs-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've mostly tuned f2fs to provide better user
  experience for Android. Especially, we've worked on atomic write
  feature again with SQLite community in order to support it officially.
  And we added or modified several facilities to analyze and enhance IO
  behaviors.

  Major changes include:
   - add app/fs io stat
   - add inode checksum feature
   - support project/journalled quota
   - enhance atomic write with new ioctl() which exposes feature set
   - enhance background gc/discard/fstrim flows with new gc_urgent mode
   - add F2FS_IOC_FS{GET,SET}XATTR
   - fix some quota flows"

* tag 'f2fs-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (63 commits)
  f2fs: hurry up to issue discard after io interruption
  f2fs: fix to show correct discard_granularity in sysfs
  f2fs: detect dirty inode in evict_inode
  f2fs: clear radix tree dirty tag of pages whose dirty flag is cleared
  f2fs: speed up gc_urgent mode with SSR
  f2fs: better to wait for fstrim completion
  f2fs: avoid race in between read xattr & write xattr
  f2fs: make get_lock_data_page to handle encrypted inode
  f2fs: use generic terms used for encrypted block management
  f2fs: introduce f2fs_encrypted_file for clean-up
  Revert "f2fs: add a new function get_ssr_cost"
  f2fs: constify super_operations
  f2fs: fix to wake up all sleeping flusher
  f2fs: avoid race in between atomic_read & atomic_inc
  f2fs: remove unneeded parameter of change_curseg
  f2fs: update i_flags correctly
  f2fs: don't check inode's checksum if it was dirtied or writebacked
  f2fs: don't need to update inode checksum for recovery
  f2fs: trigger fdatasync for non-atomic_write file
  f2fs: fix to avoid race in between aio and gc
  ...
parents cdb897e3 e6c6de18
......@@ -57,6 +57,15 @@ Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the issue rate of small discard commands.
What: /sys/fs/f2fs/<disk>/discard_granularity
Date: July 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
Description:
Controls discard granularity of inner discard thread, inner thread
will not issue discards with size that is smaller than granularity.
The unit size is one block, now only support configuring in range
of [1, 512].
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
......@@ -130,3 +139,15 @@ Date: June 2017
Contact: "Chao Yu" <yuchao0@huawei.com>
Description:
Controls current reserved blocks in system.
What: /sys/fs/f2fs/<disk>/gc_urgent
Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Do background GC agressively
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
Date: August 2017
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Controls sleep time of GC urgent mode
......@@ -164,6 +164,16 @@ io_bits=%u Set the bit size of write IO requests. It should be set
with "mode=lfs".
usrquota Enable plain user disk quota accounting.
grpquota Enable plain group disk quota accounting.
prjquota Enable plain project quota accounting.
usrjquota=<file> Appoint specified file and type during mount, so that quota
grpjquota=<file> information can be properly updated during recovery flow,
prjjquota=<file> <quota file>: must be in root directory;
jqfmt=<quota type> <quota type>: [vfsold,vfsv0,vfsv1].
offusrjquota Turn off user journelled quota.
offgrpjquota Turn off group journelled quota.
offprjjquota Turn off project journelled quota.
quota Enable plain user disk quota accounting.
noquota Disable all plain disk quota option.
================================================================================
DEBUGFS ENTRIES
......@@ -209,6 +219,15 @@ Files in /sys/fs/f2fs/<devname>
gc_idle = 1 will select the Cost Benefit approach
& setting gc_idle = 2 will select the greedy approach.
gc_urgent This parameter controls triggering background GCs
urgently or not. Setting gc_urgent = 0 [default]
makes back to default behavior, while if it is set
to 1, background thread starts to do GC by given
gc_urgent_sleep_time interval.
gc_urgent_sleep_time This parameter controls sleep time for gc_urgent.
500 ms is set by default. See above gc_urgent.
reclaim_segments This parameter controls the number of prefree
segments to be reclaimed. If the number of prefree
segments is larger than the number of segments
......
......@@ -207,15 +207,16 @@ static int __f2fs_set_acl(struct inode *inode, int type,
void *value = NULL;
size_t size = 0;
int error;
umode_t mode = inode->i_mode;
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl && !ipage) {
error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
error = posix_acl_update_mode(inode, &mode, &acl);
if (error)
return error;
set_acl_inode(inode, inode->i_mode);
set_acl_inode(inode, mode);
}
break;
......
......@@ -230,8 +230,9 @@ void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
}
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
static int __f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc,
enum iostat_type io_type)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
......@@ -244,7 +245,7 @@ static int f2fs_write_meta_page(struct page *page,
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
write_meta_page(sbi, page);
write_meta_page(sbi, page, io_type);
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
......@@ -263,6 +264,12 @@ static int f2fs_write_meta_page(struct page *page,
return AOP_WRITEPAGE_ACTIVATE;
}
static int f2fs_write_meta_page(struct page *page,
struct writeback_control *wbc)
{
return __f2fs_write_meta_page(page, wbc, FS_META_IO);
}
static int f2fs_write_meta_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
......@@ -283,7 +290,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
trace_f2fs_writepages(mapping->host, wbc, META);
diff = nr_pages_to_write(sbi, META, wbc);
written = sync_meta_pages(sbi, META, wbc->nr_to_write);
written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
mutex_unlock(&sbi->cp_mutex);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
return 0;
......@@ -295,7 +302,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
}
long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
long nr_to_write, enum iostat_type io_type)
{
struct address_space *mapping = META_MAPPING(sbi);
pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
......@@ -346,7 +353,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
if (mapping->a_ops->writepage(page, &wbc)) {
if (__f2fs_write_meta_page(page, &wbc, io_type)) {
unlock_page(page);
break;
}
......@@ -581,11 +588,24 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blocks, i, j;
int err;
unsigned int s_flags = sbi->sb->s_flags;
int err = 0;
if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
if (s_flags & MS_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
/* Turn on quotas so that they are updated correctly */
f2fs_enable_quota_files(sbi);
#endif
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
......@@ -601,14 +621,21 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
err = recover_orphan_inode(sbi, ino);
if (err) {
f2fs_put_page(page, 1);
return err;
goto out;
}
}
f2fs_put_page(page, 1);
}
/* clear Orphan Flag */
clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
return 0;
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
return err;
}
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
......@@ -904,7 +931,14 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
if (inode) {
unsigned long cur_ino = inode->i_ino;
if (is_dir)
F2FS_I(inode)->cp_task = current;
filemap_fdatawrite(inode->i_mapping);
if (is_dir)
F2FS_I(inode)->cp_task = NULL;
iput(inode);
/* We need to give cpu to another writers. */
if (ino == cur_ino) {
......@@ -1017,7 +1051,7 @@ static int block_operations(struct f2fs_sb_info *sbi)
if (get_pages(sbi, F2FS_DIRTY_NODES)) {
up_write(&sbi->node_write);
err = sync_node_pages(sbi, &wbc);
err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
if (err) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
......@@ -1115,7 +1149,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
}
......@@ -1194,7 +1228,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* Flush all the NAT BITS pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
}
......@@ -1249,7 +1283,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
percpu_counter_set(&sbi->alloc_valid_block_count, 0);
/* Here, we only have one bio having CP pack */
sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
sync_meta_pages(sbi, META_FLUSH, LONG_MAX, FS_CP_META_IO);
/* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
......
This diff is collapsed.
......@@ -705,6 +705,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
struct address_space *mapping = page_mapping(page);
unsigned long flags;
int i;
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
......@@ -735,6 +737,11 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) {
spin_lock_irqsave(&mapping->tree_lock, flags);
radix_tree_tag_clear(&mapping->page_tree, page_index(page),
PAGECACHE_TAG_DIRTY);
spin_unlock_irqrestore(&mapping->tree_lock, flags);
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
ClearPageUptodate(page);
......
This diff is collapsed.
This diff is collapsed.
......@@ -28,16 +28,21 @@ static int gc_thread_func(void *data)
struct f2fs_sb_info *sbi = data;
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
long wait_ms;
unsigned int wait_ms;
wait_ms = gc_th->min_sleep_time;
set_freezable();
do {
wait_event_interruptible_timeout(*wq,
kthread_should_stop() || freezing(current),
kthread_should_stop() || freezing(current) ||
gc_th->gc_wake,
msecs_to_jiffies(wait_ms));
/* give it a try one time */
if (gc_th->gc_wake)
gc_th->gc_wake = 0;
if (try_to_freeze())
continue;
if (kthread_should_stop())
......@@ -55,6 +60,9 @@ static int gc_thread_func(void *data)
}
#endif
if (!sb_start_write_trylock(sbi->sb))
continue;
/*
* [GC triggering condition]
* 0. GC is not conducted currently.
......@@ -69,19 +77,24 @@ static int gc_thread_func(void *data)
* So, I'd like to wait some time to collect dirty segments.
*/
if (!mutex_trylock(&sbi->gc_mutex))
continue;
goto next;
if (gc_th->gc_urgent) {
wait_ms = gc_th->urgent_sleep_time;
goto do_gc;
}
if (!is_idle(sbi)) {
increase_sleep_time(gc_th, &wait_ms);
mutex_unlock(&sbi->gc_mutex);
continue;
goto next;
}
if (has_enough_invalid_blocks(sbi))
decrease_sleep_time(gc_th, &wait_ms);
else
increase_sleep_time(gc_th, &wait_ms);
do_gc:
stat_inc_bggc_count(sbi);
/* if return value is not zero, no victim was selected */
......@@ -93,6 +106,8 @@ static int gc_thread_func(void *data)
/* balancing f2fs's metadata periodically */
f2fs_balance_fs_bg(sbi);
next:
sb_end_write(sbi->sb);
} while (!kthread_should_stop());
return 0;
......@@ -110,11 +125,14 @@ int start_gc_thread(struct f2fs_sb_info *sbi)
goto out;
}
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
gc_th->gc_idle = 0;
gc_th->gc_urgent = 0;
gc_th->gc_wake= 0;
sbi->gc_thread = gc_th;
init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
......@@ -259,20 +277,11 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
valid_blocks * 2 : valid_blocks;
}
static unsigned int get_ssr_cost(struct f2fs_sb_info *sbi,
unsigned int segno)
{
struct seg_entry *se = get_seg_entry(sbi, segno);
return se->ckpt_valid_blocks > se->valid_blocks ?
se->ckpt_valid_blocks : se->valid_blocks;
}
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
unsigned int segno, struct victim_sel_policy *p)
{
if (p->alloc_mode == SSR)
return get_ssr_cost(sbi, segno);
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
......@@ -582,7 +591,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
}
*nofs = ofs_of_node(node_page);
source_blkaddr = datablock_addr(node_page, ofs_in_node);
source_blkaddr = datablock_addr(NULL, node_page, ofs_in_node);
f2fs_put_page(node_page, 1);
if (source_blkaddr != blkaddr)
......@@ -590,8 +599,12 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
return true;
}
static void move_encrypted_block(struct inode *inode, block_t bidx,
unsigned int segno, int off)
/*
* Move data block via META_MAPPING while keeping locked data page.
* This can be used to move blocks, aka LBAs, directly on disk.
*/
static void move_data_block(struct inode *inode, block_t bidx,
unsigned int segno, int off)
{
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(inode),
......@@ -684,6 +697,8 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
fio.new_blkaddr = newaddr;
f2fs_submit_page_write(&fio);
f2fs_update_iostat(fio.sbi, FS_GC_DATA_IO, F2FS_BLKSIZE);
f2fs_update_data_blkaddr(&dn, newaddr);
set_inode_flag(inode, FI_APPEND_WRITE);
if (page->index == 0)
......@@ -731,6 +746,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
.page = page,
.encrypted_page = NULL,
.need_lock = LOCK_REQ,
.io_type = FS_GC_DATA_IO,
};
bool is_dirty = PageDirty(page);
int err;
......@@ -819,8 +835,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
continue;
/* if encrypted inode, let's go phase 3 */
if (f2fs_encrypted_inode(inode) &&
S_ISREG(inode->i_mode)) {
if (f2fs_encrypted_file(inode)) {
add_gc_inode(gc_list, inode);
continue;
}
......@@ -854,14 +869,18 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
continue;
}
locked = true;
/* wait for all inflight aio data */
inode_dio_wait(inode);
}
start_bidx = start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
move_encrypted_block(inode, start_bidx, segno, off);
if (f2fs_encrypted_file(inode))
move_data_block(inode, start_bidx, segno, off);
else
move_data_page(inode, start_bidx, gc_type, segno, off);
move_data_page(inode, start_bidx, gc_type,
segno, off);
if (locked) {
up_write(&fi->dio_rwsem[WRITE]);
......@@ -898,7 +917,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
struct blk_plug plug;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + sbi->segs_per_sec;
int sec_freed = 0;
int seg_freed = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
......@@ -944,6 +963,10 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
gc_type);
stat_inc_seg_count(sbi, type, gc_type);
if (gc_type == FG_GC &&
get_valid_blocks(sbi, segno, false) == 0)
seg_freed++;
next:
f2fs_put_page(sum_page, 0);
}
......@@ -954,21 +977,17 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
blk_finish_plug(&plug);
if (gc_type == FG_GC &&
get_valid_blocks(sbi, start_segno, true) == 0)
sec_freed = 1;
stat_inc_call_count(sbi->stat_info);
return sec_freed;
return seg_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
bool background, unsigned int segno)
{
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0;
int ret;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
int ret = 0;
struct cp_control cpc;
unsigned int init_segno = segno;
struct gc_inode_list gc_list = {
......@@ -976,6 +995,15 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
.iroot = RADIX_TREE_INIT(GFP_NOFS),
};
trace_f2fs_gc_begin(sbi->sb, sync, background,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
free_sections(sbi),
free_segments(sbi),
reserved_segments(sbi),
prefree_segments(sbi));
cpc.reason = __get_cp_reason(sbi);
gc_more:
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) {
......@@ -1002,17 +1030,20 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
gc_type = FG_GC;
}
ret = -EINVAL;
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && !background)
if (gc_type == BG_GC && !background) {
ret = -EINVAL;
goto stop;
if (!__get_victim(sbi, &segno, gc_type))
}
if (!__get_victim(sbi, &segno, gc_type)) {
ret = -ENODATA;
goto stop;
ret = 0;
}
if (do_garbage_collect(sbi, segno, &gc_list, gc_type) &&
gc_type == FG_GC)
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
sec_freed++;
total_freed += seg_freed;
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
......@@ -1029,6 +1060,16 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
stop:
SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0;
SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno;
trace_f2fs_gc_end(sbi->sb, ret, total_freed, sec_freed,
get_pages(sbi, F2FS_DIRTY_NODES),
get_pages(sbi, F2FS_DIRTY_DENTS),
get_pages(sbi, F2FS_DIRTY_IMETA),
free_sections(sbi),
free_segments(sbi),
reserved_segments(sbi),
prefree_segments(sbi));
mutex_unlock(&sbi->gc_mutex);
put_gc_inode(&gc_list);
......
......@@ -13,6 +13,7 @@
* whether IO subsystem is idle
* or not
*/
#define DEF_GC_THREAD_URGENT_SLEEP_TIME 500 /* 500 ms */
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
......@@ -27,12 +28,15 @@ struct f2fs_gc_kthread {
wait_queue_head_t gc_wait_queue_head;
/* for gc sleep time */
unsigned int urgent_sleep_time;
unsigned int min_sleep_time;
unsigned int max_sleep_time;
unsigned int no_gc_sleep_time;
/* for changing gc mode */
unsigned int gc_idle;
unsigned int gc_urgent;
unsigned int gc_wake;
};
struct gc_inode_list {
......@@ -65,25 +69,32 @@ static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
}
static inline void increase_sleep_time(struct f2fs_gc_kthread *gc_th,
long *wait)
unsigned int *wait)
{
unsigned int min_time = gc_th->min_sleep_time;
unsigned int max_time = gc_th->max_sleep_time;
if (*wait == gc_th->no_gc_sleep_time)
return;
*wait += gc_th->min_sleep_time;
if (*wait > gc_th->max_sleep_time)
*wait = gc_th->max_sleep_time;
if ((long long)*wait + (long long)min_time > (long long)max_time)
*wait = max_time;
else
*wait += min_time;
}
static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
long *wait)
unsigned int *wait)
{
unsigned int min_time = gc_th->min_sleep_time;
if (*wait == gc_th->no_gc_sleep_time)
*wait = gc_th->max_sleep_time;
*wait -= gc_th->min_sleep_time;
if (*wait <= gc_th->min_sleep_time)
*wait = gc_th->min_sleep_time;
if ((long long)*wait - (long long)min_time < (long long)min_time)
*wait = min_time;
else
*wait -= min_time;
}
static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
......
This diff is collapsed.
......@@ -49,20 +49,22 @@ void f2fs_set_inode_flags(struct inode *inode)
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
{
int extra_size = get_extra_isize(inode);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
if (ri->i_addr[0])
inode->i_rdev =
old_decode_dev(le32_to_cpu(ri->i_addr[0]));
if (ri->i_addr[extra_size])
inode->i_rdev = old_decode_dev(
le32_to_cpu(ri->i_addr[extra_size]));
else
inode->i_rdev =
new_decode_dev(le32_to_cpu(ri->i_addr[1]));
inode->i_rdev = new_decode_dev(
le32_to_cpu(ri->i_addr[extra_size + 1]));
}
}
static bool __written_first_block(struct f2fs_inode *ri)
{
block_t addr = le32_to_cpu(ri->i_addr[0]);
block_t addr = le32_to_cpu(ri->i_addr[offset_in_addr(ri)]);
if (addr != NEW_ADDR && addr != NULL_ADDR)
return true;
......@@ -71,25 +73,27 @@ static bool __written_first_block(struct f2fs_inode *ri)
static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
{
int extra_size = get_extra_isize(inode);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
if (old_valid_dev(inode->i_rdev)) {
ri->i_addr[0] =
ri->i_addr[extra_size] =
cpu_to_le32(old_encode_dev(inode->i_rdev));
ri->i_addr[1] = 0;
ri->i_addr[extra_size + 1] = 0;
} else {
ri->i_addr[0] = 0;
ri->i_addr[1] =
ri->i_addr[extra_size] = 0;
ri->i_addr[extra_size + 1] =
cpu_to_le32(new_encode_dev(inode->i_rdev));
ri->i_addr[2] = 0;
ri->i_addr[extra_size + 2] = 0;
}
}
}
static void __recover_inline_status(struct inode *inode, struct page *ipage)
{
void *inline_data = inline_data_addr(ipage);
void *inline_data = inline_data_addr(inode, ipage);
__le32 *start = inline_data;
__le32 *end = start + MAX_INLINE_DATA / sizeof(__le32);
__le32 *end = start + MAX_INLINE_DATA(inode) / sizeof(__le32);
while (start < end) {
if (*start++) {
......@@ -104,12 +108,84 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
return;
}
static bool f2fs_enable_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
int extra_isize = le32_to_cpu(ri->i_extra_isize);
if (!f2fs_sb_has_inode_chksum(sbi->sb))
return false;
if (!RAW_IS_INODE(F2FS_NODE(page)) || !(ri->i_inline & F2FS_EXTRA_ATTR))
return false;
if (!F2FS_FITS_IN_INODE(ri, extra_isize, i_inode_checksum))
return false;
return true;
}
static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_node *node = F2FS_NODE(page);
struct f2fs_inode *ri = &node->i;
__le32 ino = node->footer.ino;
__le32 gen = ri->i_generation;
__u32 chksum, chksum_seed;
__u32 dummy_cs = 0;
unsigned int offset = offsetof(struct f2fs_inode, i_inode_checksum);
unsigned int cs_size = sizeof(dummy_cs);
chksum = f2fs_chksum(sbi, sbi->s_chksum_seed, (__u8 *)&ino,
sizeof(ino));
chksum_seed = f2fs_chksum(sbi, chksum, (__u8 *)&gen, sizeof(gen));
chksum = f2fs_chksum(sbi, chksum_seed, (__u8 *)ri, offset);
chksum = f2fs_chksum(sbi, chksum, (__u8 *)&dummy_cs, cs_size);
offset += cs_size;
chksum = f2fs_chksum(sbi, chksum, (__u8 *)ri + offset,
F2FS_BLKSIZE - offset);
return chksum;
}
bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *ri;
__u32 provided, calculated;
if (!f2fs_enable_inode_chksum(sbi, page) ||
PageDirty(page) || PageWriteback(page))
return true;
ri = &F2FS_NODE(page)->i;
provided = le32_to_cpu(ri->i_inode_checksum);
calculated = f2fs_inode_chksum(sbi, page);
if (provided != calculated)
f2fs_msg(sbi->sb, KERN_WARNING,
"checksum invalid, ino = %x, %x vs. %x",
ino_of_node(page), provided, calculated);
return provided == calculated;
}
void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page)
{
struct f2fs_inode *ri = &F2FS_NODE(page)->i;
if (!f2fs_enable_inode_chksum(sbi, page))
return;
ri->i_inode_checksum = cpu_to_le32(f2fs_inode_chksum(sbi, page));
}
static int do_read_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct page *node_page;
struct f2fs_inode *ri;
projid_t i_projid;
/* Check if ino is within scope */
if (check_nid_range(sbi, inode->i_ino)) {
......@@ -153,6 +229,9 @@ static int do_read_inode(struct inode *inode)
get_inline_info(inode, ri);
fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
le16_to_cpu(ri->i_extra_isize) : 0;
/* check data exist */
if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
__recover_inline_status(inode, node_page);
......@@ -166,6 +245,16 @@ static int do_read_inode(struct inode *inode)
if (!need_inode_block_update(sbi, inode->i_ino))
fi->last_disk_size = inode->i_size;
if (fi->i_flags & FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_project_quota(sbi->sb) &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid))
i_projid = (projid_t)le32_to_cpu(ri->i_projid);
else
i_projid = F2FS_DEF_PROJID;
fi->i_projid = make_kprojid(&init_user_ns, i_projid);
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
......@@ -292,6 +381,20 @@ int update_inode(struct inode *inode, struct page *node_page)
ri->i_generation = cpu_to_le32(inode->i_generation);
ri->i_dir_level = F2FS_I(inode)->i_dir_level;
if (f2fs_has_extra_attr(inode)) {
ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_projid)) {
projid_t i_projid;
i_projid = from_kprojid(&init_user_ns,
F2FS_I(inode)->i_projid);
ri->i_projid = cpu_to_le32(i_projid);
}
}
__set_inode_rdev(inode, ri);
set_cold_node(inode, node_page);
......@@ -416,6 +519,9 @@ void f2fs_evict_inode(struct inode *inode)
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
if (!is_set_ckpt_flags(sbi, CP_ERROR_FLAG))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
/* ino == 0, if f2fs_new_inode() was failed t*/
if (inode->i_ino)
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino,
......
......@@ -58,6 +58,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
goto fail;
}
if (f2fs_sb_has_project_quota(sbi->sb) &&
(F2FS_I(dir)->i_flags & FS_PROJINHERIT_FL))
F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
else
F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
err = dquot_initialize(inode);
if (err)
goto fail_drop;
......@@ -72,6 +79,11 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
set_inode_flag(inode, FI_NEW_INODE);
if (f2fs_sb_has_extra_attr(sbi->sb)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
......@@ -85,6 +97,15 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
F2FS_I(inode)->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_flags |= FS_INDEX_FL;
if (F2FS_I(inode)->i_flags & FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
trace_f2fs_new_inode(inode, 0);
return inode;
......@@ -204,6 +225,11 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
!fscrypt_has_permitted_context(dir, inode))
return -EPERM;
if (is_inode_flag_set(dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(dir)->i_projid,
F2FS_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(dir);
if (err)
return err;
......@@ -261,6 +287,10 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
return 0;
}
err = dquot_initialize(dir);
if (err)
return err;
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
......@@ -724,6 +754,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
}
if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(new_dir)->i_projid,
F2FS_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(old_dir);
if (err)
goto out;
......@@ -912,6 +947,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
!fscrypt_has_permitted_context(old_dir, new_inode)))
return -EPERM;
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
F2FS_I(old_dentry->d_inode)->i_projid)) ||
(is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(old_dir)->i_projid,
F2FS_I(new_dentry->d_inode)->i_projid)))
return -EXDEV;
err = dquot_initialize(old_dir);
if (err)
goto out;
......
......@@ -19,6 +19,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "xattr.h"
#include "trace.h"
#include <trace/events/f2fs.h>
......@@ -554,7 +555,7 @@ static int get_node_path(struct inode *inode, long block,
level = 3;
goto got;
} else {
BUG();
return -E2BIG;
}
got:
return level;
......@@ -578,6 +579,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
int err = 0;
level = get_node_path(dn->inode, index, offset, noffset);
if (level < 0)
return level;
nids[0] = dn->inode->i_ino;
npage[0] = dn->inode_page;
......@@ -613,7 +616,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
}
dn->nid = nids[i];
npage[i] = new_node_page(dn, noffset[i], NULL);
npage[i] = new_node_page(dn, noffset[i]);
if (IS_ERR(npage[i])) {
alloc_nid_failed(sbi, nids[i]);
err = PTR_ERR(npage[i]);
......@@ -654,7 +657,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
dn->nid = nids[level];
dn->ofs_in_node = offset[level];
dn->node_page = npage[level];
dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
dn->data_blkaddr = datablock_addr(dn->inode,
dn->node_page, dn->ofs_in_node);
return 0;
release_pages:
......@@ -876,6 +880,8 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
trace_f2fs_truncate_inode_blocks_enter(inode, from);
level = get_node_path(inode, from, offset, noffset);
if (level < 0)
return level;
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
......@@ -1022,11 +1028,10 @@ struct page *new_inode_page(struct inode *inode)
set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
/* caller should f2fs_put_page(page, 1); */
return new_node_page(&dn, 0, NULL);
return new_node_page(&dn, 0);
}
struct page *new_node_page(struct dnode_of_data *dn,
unsigned int ofs, struct page *ipage)
struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct node_info new_ni;
......@@ -1170,6 +1175,11 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
err = -EIO;
goto out_err;
}
if (!f2fs_inode_chksum_verify(sbi, page)) {
err = -EBADMSG;
goto out_err;
}
page_hit:
if(unlikely(nid != nid_of_node(page))) {
f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
......@@ -1177,9 +1187,9 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
nid, nid_of_node(page), ino_of_node(page),
ofs_of_node(page), cpver_of_node(page),
next_blkaddr_of_node(page));
ClearPageUptodate(page);
err = -EINVAL;
out_err:
ClearPageUptodate(page);
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
......@@ -1326,7 +1336,8 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
}
static int __write_node_page(struct page *page, bool atomic, bool *submitted,
struct writeback_control *wbc)
struct writeback_control *wbc, bool do_balance,
enum iostat_type io_type)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
nid_t nid;
......@@ -1339,6 +1350,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
.page = page,
.encrypted_page = NULL,
.submitted = false,
.io_type = io_type,
};
trace_f2fs_writepage(page, NODE);
......@@ -1395,6 +1407,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
if (submitted)
*submitted = fio.submitted;
if (do_balance)
f2fs_balance_fs(sbi, false);
return 0;
redirty_out:
......@@ -1405,7 +1419,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
static int f2fs_write_node_page(struct page *page,
struct writeback_control *wbc)
{
return __write_node_page(page, false, NULL, wbc);
return __write_node_page(page, false, NULL, wbc, false, FS_NODE_IO);
}
int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
......@@ -1493,7 +1507,8 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
ret = __write_node_page(page, atomic &&
page == last_page,
&submitted, wbc);
&submitted, wbc, true,
FS_NODE_IO);
if (ret) {
unlock_page(page);
f2fs_put_page(last_page, 0);
......@@ -1530,7 +1545,8 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
return ret ? -EIO: 0;
}
int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
bool do_balance, enum iostat_type io_type)
{
pgoff_t index, end;
struct pagevec pvec;
......@@ -1608,7 +1624,8 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc)
set_fsync_mark(page, 0);
set_dentry_mark(page, 0);
ret = __write_node_page(page, false, &submitted, wbc);
ret = __write_node_page(page, false, &submitted,
wbc, do_balance, io_type);
if (ret)
unlock_page(page);
else if (submitted)
......@@ -1697,7 +1714,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
diff = nr_pages_to_write(sbi, NODE, wbc);
wbc->sync_mode = WB_SYNC_NONE;
blk_start_plug(&plug);
sync_node_pages(sbi, wbc);
sync_node_pages(sbi, wbc, true, FS_NODE_IO);
blk_finish_plug(&plug);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return 0;
......@@ -2191,7 +2208,8 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
nid_t new_xnid = nid_of_node(page);
nid_t new_xnid;
struct dnode_of_data dn;
struct node_info ni;
struct page *xpage;
......@@ -2207,22 +2225,22 @@ int recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
recover_xnid:
/* 2: update xattr nid in inode */
remove_free_nid(sbi, new_xnid);
f2fs_i_xnid_write(inode, new_xnid);
if (unlikely(inc_valid_node_count(sbi, inode, false)))
f2fs_bug_on(sbi, 1);
if (!alloc_nid(sbi, &new_xnid))
return -ENOSPC;
set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_xnid);
return PTR_ERR(xpage);
}
alloc_nid_done(sbi, new_xnid);
update_inode_page(inode);
/* 3: update and set xattr node page dirty */
xpage = grab_cache_page(NODE_MAPPING(sbi), new_xnid);
if (!xpage)
return -ENOMEM;
memcpy(F2FS_NODE(xpage), F2FS_NODE(page), PAGE_SIZE);
memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
get_node_info(sbi, new_xnid, &ni);
ni.ino = inode->i_ino;
set_node_addr(sbi, &ni, NEW_ADDR, false);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
......@@ -2262,7 +2280,14 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
dst->i_blocks = cpu_to_le64(1);
dst->i_links = cpu_to_le32(1);
dst->i_xattr_nid = 0;
dst->i_inline = src->i_inline & F2FS_INLINE_XATTR;
dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
if (dst->i_inline & F2FS_EXTRA_ATTR) {
dst->i_extra_isize = src->i_extra_isize;
if (f2fs_sb_has_project_quota(sbi->sb) &&
F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
i_projid))
dst->i_projid = src->i_projid;
}
new_ni = old_ni;
new_ni.ino = ino;
......
......@@ -69,20 +69,34 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
}
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
struct list_head *head, nid_t ino)
struct list_head *head, nid_t ino, bool quota_inode)
{
struct inode *inode;
struct fsync_inode_entry *entry;
int err;
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
err = dquot_initialize(inode);
if (err)
goto err_out;
if (quota_inode) {
err = dquot_alloc_inode(inode);
if (err)
goto err_out;
}
entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
entry->inode = inode;
list_add_tail(&entry->list, head);
return entry;
err_out:
iput(inode);
return ERR_PTR(err);
}
static void del_fsync_inode(struct fsync_inode_entry *entry)
......@@ -107,7 +121,8 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
entry = get_fsync_inode(dir_list, pino);
if (!entry) {
entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
entry = add_fsync_inode(F2FS_I_SB(inode), dir_list,
pino, false);
if (IS_ERR(entry)) {
dir = ERR_CAST(entry);
err = PTR_ERR(entry);
......@@ -140,6 +155,13 @@ static int recover_dentry(struct inode *inode, struct page *ipage,
err = -EEXIST;
goto out_unmap_put;
}
err = dquot_initialize(einode);
if (err) {
iput(einode);
goto out_unmap_put;
}
err = acquire_orphan_inode(F2FS_I_SB(inode));
if (err) {
iput(einode);
......@@ -226,18 +248,22 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
entry = get_fsync_inode(head, ino_of_node(page));
if (!entry) {
bool quota_inode = false;
if (!check_only &&
IS_INODE(page) && is_dent_dnode(page)) {
err = recover_inode_page(sbi, page);
if (err)
break;
quota_inode = true;
}
/*
* CP | dnode(F) | inode(DF)
* For this case, we should not give up now.
*/
entry = add_fsync_inode(sbi, head, ino_of_node(page));
entry = add_fsync_inode(sbi, head, ino_of_node(page),
quota_inode);
if (IS_ERR(entry)) {
err = PTR_ERR(entry);
if (err == -ENOENT) {
......@@ -291,7 +317,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
return 0;
/* Get the previous summary */
for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
if (curseg->segno == segno) {
sum = curseg->sum_blk->entries[blkoff];
......@@ -328,10 +354,18 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
f2fs_put_page(node_page, 1);
if (ino != dn->inode->i_ino) {
int ret;
/* Deallocate previous index in the node page */
inode = f2fs_iget_retry(sbi->sb, ino);
if (IS_ERR(inode))
return PTR_ERR(inode);
ret = dquot_initialize(inode);
if (ret) {
iput(inode);
return ret;
}
} else {
inode = dn->inode;
}
......@@ -361,7 +395,8 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
return 0;
truncate_out:
if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
if (datablock_addr(tdn.inode, tdn.node_page,
tdn.ofs_in_node) == blkaddr)
truncate_data_blocks_range(&tdn, 1);
if (dn->inode->i_ino == nid && !dn->inode_page_locked)
unlock_page(dn->inode_page);
......@@ -414,8 +449,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest;
src = datablock_addr(dn.node_page, dn.ofs_in_node);
dest = datablock_addr(page, dn.ofs_in_node);
src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
/* skip recovering if dest is the same as src */
if (src == dest)
......@@ -557,12 +592,27 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
struct list_head dir_list;
int err;
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
if (s_flags & MS_RDONLY) {
f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~MS_RDONLY;
}
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sbi->sb->s_flags |= MS_ACTIVE;
/* Turn on quotas so that they are updated correctly */
f2fs_enable_quota_files(sbi);
#endif
fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
sizeof(struct fsync_inode_entry));
if (!fsync_entry_slab)
return -ENOMEM;
if (!fsync_entry_slab) {
err = -ENOMEM;
goto out;
}
INIT_LIST_HEAD(&inode_list);
INIT_LIST_HEAD(&dir_list);
......@@ -573,11 +623,11 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list, check_only);
if (err || list_empty(&inode_list))
goto out;
goto skip;
if (check_only) {
ret = 1;
goto out;
goto skip;
}
need_writecp = true;
......@@ -586,7 +636,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
err = recover_data(sbi, &inode_list, &dir_list);
if (!err)
f2fs_bug_on(sbi, !list_empty(&inode_list));
out:
skip:
destroy_fsync_dnodes(&inode_list);
/* truncate meta pages to be used by the recovery */
......@@ -599,8 +649,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
}
clear_sbi_flag(sbi, SBI_POR_DOING);
if (err)
set_ckpt_flags(sbi, CP_ERROR_FLAG);
mutex_unlock(&sbi->cp_mutex);
/* let's drop all the directory inodes for clean checkpoint */
......@@ -614,5 +662,12 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
}
kmem_cache_destroy(fsync_entry_slab);
out:
#ifdef CONFIG_QUOTA
/* Turn quotas off */
f2fs_quota_off_umount(sbi->sb);
#endif
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
return ret ? ret: err;
}
This diff is collapsed.
......@@ -492,29 +492,11 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi)
return SM_I(sbi)->ovp_segments;
}
static inline int overprovision_sections(struct f2fs_sb_info *sbi)
{
return GET_SEC_FROM_SEG(sbi, (unsigned int)overprovision_segments(sbi));
}
static inline int reserved_sections(struct f2fs_sb_info *sbi)
{
return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
}
static inline bool need_SSR(struct f2fs_sb_info *sbi)
{
int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
if (test_opt(sbi, LFS))
return false;
return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
2 * reserved_sections(sbi));
}
static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
int freed, int needed)
{
......@@ -577,6 +559,10 @@ static inline bool need_inplace_update_policy(struct inode *inode,
if (test_opt(sbi, LFS))
return false;
/* if this is cold file, we should overwrite to avoid fragmentation */
if (file_is_cold(inode))
return true;
if (policy & (0x1 << F2FS_IPU_FORCE))
return true;
if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
......@@ -799,3 +785,28 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
wbc->nr_to_write = desired;
return desired - nr_to_write;
}
static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
bool wakeup = false;
int i;
if (force)
goto wake_up;
mutex_lock(&dcc->cmd_lock);
for (i = MAX_PLIST_NUM - 1;
i >= 0 && plist_issue(dcc->pend_list_tag[i]); i--) {
if (!list_empty(&dcc->pend_list[i])) {
wakeup = true;
break;
}
}
mutex_unlock(&dcc->cmd_lock);
if (!wakeup)
return;
wake_up:
dcc->discard_wake = 1;
wake_up_interruptible_all(&dcc->discard_wait_queue);
}
This diff is collapsed.
This diff is collapsed.
......@@ -442,7 +442,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
xpage = new_node_page(&dn, XATTR_NODE_OFFSET, ipage);
xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
alloc_nid_failed(sbi, new_nid);
return PTR_ERR(xpage);
......@@ -473,8 +473,10 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
down_read(&F2FS_I(inode)->i_xattr_sem);
error = lookup_all_xattrs(inode, ipage, index, len, name,
&entry, &base_addr);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
......@@ -503,7 +505,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
int error = 0;
size_t rest = buffer_size;
down_read(&F2FS_I(inode)->i_xattr_sem);
error = read_all_xattrs(inode, NULL, &base_addr);
up_read(&F2FS_I(inode)->i_xattr_sem);
if (error)
return error;
......@@ -686,7 +690,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
f2fs_lock_op(sbi);
/* protect xattr_ver */
down_write(&F2FS_I(inode)->i_sem);
down_write(&F2FS_I(inode)->i_xattr_sem);
err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
up_write(&F2FS_I(inode)->i_xattr_sem);
up_write(&F2FS_I(inode)->i_sem);
f2fs_unlock_op(sbi);
......
......@@ -186,6 +186,8 @@ struct f2fs_extent {
#define F2FS_NAME_LEN 255
#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */
#define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */
#define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \
get_extra_isize(inode))
#define DEF_NIDS_PER_INODE 5 /* Node IDs in an Inode */
#define ADDRS_PER_INODE(inode) addrs_per_inode(inode)
#define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */
......@@ -205,9 +207,7 @@ struct f2fs_extent {
#define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */
#define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */
#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */
#define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
F2FS_INLINE_XATTR_ADDRS - 1))
#define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */
struct f2fs_inode {
__le16 i_mode; /* file mode */
......@@ -235,8 +235,16 @@ struct f2fs_inode {
struct f2fs_extent i_ext; /* caching a largest extent */
__le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */
union {
struct {
__le16 i_extra_isize; /* extra inode attribute size */
__le16 i_padding; /* padding */
__le32 i_projid; /* project id */
__le32 i_inode_checksum;/* inode meta checksum */
__le32 i_extra_end[0]; /* for attribute size calculation */
};
__le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */
};
__le32 i_nid[DEF_NIDS_PER_INODE]; /* direct(2), indirect(2),
double_indirect(1) node id */
} __packed;
......@@ -465,7 +473,7 @@ typedef __le32 f2fs_hash_t;
#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
/*
* space utilization of regular dentry and inline dentry
* space utilization of regular dentry and inline dentry (w/o extra reservation)
* regular dentry inline dentry
* bitmap 1 * 27 = 27 1 * 23 = 23
* reserved 1 * 3 = 3 1 * 7 = 7
......@@ -501,24 +509,6 @@ struct f2fs_dentry_block {
__u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN];
} __packed;
/* for inline dir */
#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
BITS_PER_BYTE + 1))
#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \
BITS_PER_BYTE - 1) / BITS_PER_BYTE)
#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE))
/* inline directory entry structure */
struct f2fs_inline_dentry {
__u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE];
__u8 reserved[INLINE_RESERVED_SIZE];
struct f2fs_dir_entry dentry[NR_INLINE_DENTRY];
__u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN];
} __packed;
/* file types used in inode_info->flags */
enum {
F2FS_FT_UNKNOWN,
......@@ -534,4 +524,6 @@ enum {
#define S_SHIFT 12
#define F2FS_DEF_PROJID 0 /* default project ID */
#endif /* _LINUX_F2FS_FS_H */
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment