Commit 25c4e6c3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "This round introduces several interesting features such as on-disk NAT
  bitmaps, IO alignment, and a discard thread. And it includes a couple
  of major bug fixes as below.

  Enhancements:

   - introduce on-disk bitmaps to avoid scanning NAT blocks when getting
     free nids

   - support IO alignment to prepare open-channel SSD integration in
     future

   - introduce a discard thread to avoid long latency during checkpoint
     and fstrim

   - use SSR for warm node and enable inline_xattr by default

   - introduce in-memory bitmaps to check FS consistency for debugging

   - improve write_begin by avoiding needless read IO

  Bug fixes:

   - fix broken zone_reset behavior for SMR drive

   - fix wrong victim selection policy during GC

   - fix missing behavior when preparing discard commands

   - fix bugs in atomic write support and fiemap

   - workaround to handle multiple f2fs_add_link calls having same name

  ... and it includes a bunch of clean-up patches as well"

* tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (97 commits)
  f2fs: avoid to flush nat journal entries
  f2fs: avoid to issue redundant discard commands
  f2fs: fix a plint compile warning
  f2fs: add f2fs_drop_inode tracepoint
  f2fs: Fix zoned block device support
  f2fs: remove redundant set_page_dirty()
  f2fs: fix to enlarge size of write_io_dummy mempool
  f2fs: fix memory leak of write_io_dummy mempool during umount
  f2fs: fix to update F2FS_{CP_}WB_DATA count correctly
  f2fs: use MAX_FREE_NIDS for the free nids target
  f2fs: introduce free nid bitmap
  f2fs: new helper cur_cp_crc() getting crc in f2fs_checkpoint
  f2fs: update the comment of default nr_pages to skipping
  f2fs: drop the duplicate pval in f2fs_getxattr
  f2fs: Don't update the xattr data that same as the exist
  f2fs: kill __is_extent_same
  f2fs: avoid bggc->fggc when enough free segments are avaliable after cp
  f2fs: select target segment with closer temperature in SSR mode
  f2fs: show simple call stack in fault injection message
  f2fs: no need lock_op in f2fs_write_inline_data
  ...
parents 6053dc98 900f7362
......@@ -125,13 +125,14 @@ active_logs=%u Support configuring the number of active logs. In the
disable_ext_identify Disable the extension list configured by mkfs, so f2fs
does not aware of cold files such as media files.
inline_xattr Enable the inline xattrs feature.
noinline_xattr Disable the inline xattrs feature.
inline_data Enable the inline data feature: New created small(<~3.4k)
files can be written into inode block.
inline_dentry Enable the inline dir feature: data in new created
directory entries can be written into inode block. The
space of inode block which is used to store inline
dentries is limited to ~3.4k.
noinline_dentry Diable the inline dentry feature.
noinline_dentry Disable the inline dentry feature.
flush_merge Merge concurrent cache_flush commands as much as possible
to eliminate redundant command issues. If the underlying
device handles the cache_flush command relatively slowly,
......@@ -157,6 +158,8 @@ data_flush Enable data flushing before checkpoint in order to
mode=%s Control block allocation mode which supports "adaptive"
and "lfs". In "lfs" mode, there should be no random
writes towards main area.
io_bits=%u Set the bit size of write IO requests. It should be set
with "mode=lfs".
================================================================================
DEBUGFS ENTRIES
......@@ -174,7 +177,7 @@ f2fs. Each file shows the whole f2fs information.
SYSFS ENTRIES
================================================================================
Information about mounted f2f2 file systems can be found in
Information about mounted f2fs file systems can be found in
/sys/fs/f2fs. Each mounted filesystem will have a directory in
/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda).
The files in each per-device directory are shown in table below.
......
......@@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page,
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
0, page->index, META, WRITE);
unlock_page(page);
......@@ -493,6 +494,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
return -ENOSPC;
}
#endif
......@@ -681,8 +683,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
return -EINVAL;
}
crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
+ crc_offset)));
crc = cur_cp_crc(*cp_block);
if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
return -EINVAL;
......@@ -891,7 +892,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return 0;
}
fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[type]);
if (inode) {
......@@ -924,7 +925,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->inode_lock[DIRTY_META]);
return 0;
}
fi = list_entry(head->next, struct f2fs_inode_info,
fi = list_first_entry(head, struct f2fs_inode_info,
gdirty_list);
inode = igrab(&fi->vfs_inode);
spin_unlock(&sbi->inode_lock[DIRTY_META]);
......@@ -998,8 +999,6 @@ static int block_operations(struct f2fs_sb_info *sbi)
static void unblock_operations(struct f2fs_sb_info *sbi)
{
up_write(&sbi->node_write);
build_free_nids(sbi, false);
f2fs_unlock_all(sbi);
}
......@@ -1025,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
spin_lock(&sbi->cp_lock);
if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
disable_nat_bits(sbi, false);
if (cpc->reason == CP_UMOUNT)
__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
......@@ -1137,6 +1140,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
/* write nat bits */
if (enabled_nat_bits(sbi, cpc)) {
__u64 cp_ver = cur_cp_version(ckpt);
unsigned int i;
block_t blk;
cp_ver |= ((__u64)crc32 << 32);
*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++)
update_meta_page(sbi, nm_i->nat_bits +
(i << F2FS_BLKSIZE_BITS), blk + i);
/* Flush all the NAT BITS pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
sync_meta_pages(sbi, META, LONG_MAX);
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
}
}
/* need to wait for end_io results */
wait_on_all_pages_writeback(sbi);
if (unlikely(f2fs_cp_error(sbi)))
......@@ -1248,15 +1273,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_merged_bios(sbi);
/* this is the case of multiple fstrims without any changes */
if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
f2fs_bug_on(sbi, prefree_segments(sbi));
flush_sit_entries(sbi, cpc);
clear_prefree_segments(sbi, cpc);
f2fs_wait_all_discard_bio(sbi);
unblock_operations(sbi);
goto out;
if (cpc->reason == CP_DISCARD) {
if (!exist_trim_candidates(sbi, cpc)) {
unblock_operations(sbi);
goto out;
}
if (NM_I(sbi)->dirty_nat_cnt == 0 &&
SIT_I(sbi)->dirty_sentries == 0 &&
prefree_segments(sbi) == 0) {
flush_sit_entries(sbi, cpc);
clear_prefree_segments(sbi, cpc);
unblock_operations(sbi);
goto out;
}
}
/*
......@@ -1268,17 +1298,15 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
/* write cached NAT/SIT entries to NAT/SIT area */
flush_nat_entries(sbi);
flush_nat_entries(sbi, cpc);
flush_sit_entries(sbi, cpc);
/* unlock all the fs_lock[] in do_checkpoint() */
err = do_checkpoint(sbi, cpc);
if (err) {
if (err)
release_discard_addrs(sbi);
} else {
else
clear_prefree_segments(sbi, cpc);
f2fs_wait_all_discard_bio(sbi);
}
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
......
This diff is collapsed.
......@@ -50,8 +50,16 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
si->aw_cnt = atomic_read(&sbi->aw_cnt);
si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
if (SM_I(sbi) && SM_I(sbi)->fcc_info)
si->nr_flush =
atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
if (SM_I(sbi) && SM_I(sbi)->dcc_info)
si->nr_discard =
atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
......@@ -62,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->append = sbi->im[APPEND_INO].ino_num;
si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
si->utilization = utilization(sbi);
......@@ -183,6 +193,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build nm */
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
si->base_mem += NM_I(sbi)->nat_blocks / 8;
get_cache:
si->cache_mem = 0;
......@@ -192,8 +205,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
si->cache_mem += sizeof(struct f2fs_gc_kthread);
/* build merge flush thread */
if (SM_I(sbi)->cmd_control_info)
if (SM_I(sbi)->fcc_info)
si->cache_mem += sizeof(struct flush_cmd_control);
if (SM_I(sbi)->dcc_info)
si->cache_mem += sizeof(struct discard_cmd_control);
/* free nids */
si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
......@@ -254,8 +269,8 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
seq_printf(s, " - Orphan Inode: %u\n",
si->orphans);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
si->main_area_segs, si->main_area_sections,
si->main_area_zones);
......@@ -314,8 +329,11 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n",
si->ext_tree, si->zombie_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
si->nr_wb_cp_data, si->nr_wb_data,
si->nr_flush, si->nr_discard);
seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
seq_printf(s, " - nodes: %4d in %4d\n",
si->ndirty_node, si->node_pages);
seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n",
......@@ -414,6 +432,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0);
atomic_set(&sbi->aw_cnt, 0);
atomic_set(&sbi->max_aw_cnt, 0);
mutex_lock(&f2fs_stat_mutex);
list_add_tail(&si->stat_list, &f2fs_stat_list);
mutex_unlock(&f2fs_stat_mutex);
......
......@@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
f2fs_put_page(dentry_page, 0);
}
if (!de && room && F2FS_I(dir)->chash != namehash) {
F2FS_I(dir)->chash = namehash;
F2FS_I(dir)->clevel = level;
/* This is to increase the speed of f2fs_create */
if (!de && room) {
F2FS_I(dir)->task = current;
if (F2FS_I(dir)->chash != namehash) {
F2FS_I(dir)->chash = namehash;
F2FS_I(dir)->clevel = level;
}
}
return de;
......@@ -548,8 +552,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
start:
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
return -ENOSPC;
}
#endif
if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
return -ENOSPC;
......@@ -646,14 +652,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct inode *inode, nid_t ino, umode_t mode)
{
struct fscrypt_name fname;
struct page *page = NULL;
struct f2fs_dir_entry *de = NULL;
int err;
err = fscrypt_setup_filename(dir, name, 0, &fname);
if (err)
return err;
err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
/*
* An immature stakable filesystem shows a race condition between lookup
* and create. If we have same task when doing lookup and create, it's
* definitely fine as expected by VFS normally. Otherwise, let's just
* verify on-disk dentry one more time, which guarantees filesystem
* consistency more.
*/
if (current != F2FS_I(dir)->task) {
de = __f2fs_find_entry(dir, &fname, &page);
F2FS_I(dir)->task = NULL;
}
if (de) {
f2fs_dentry_kunmap(dir, page);
f2fs_put_page(page, 0);
err = -EEXIST;
} else if (IS_ERR(page)) {
err = PTR_ERR(page);
} else {
err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
}
fscrypt_free_filename(&fname);
return err;
}
......
......@@ -77,7 +77,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
struct extent_tree *et;
nid_t ino = inode->i_ino;
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
......@@ -94,7 +94,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
atomic_dec(&sbi->total_zombie_tree);
list_del_init(&et->list);
}
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
......@@ -311,28 +311,24 @@ static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
}
return en;
}
......@@ -352,11 +348,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
if (en)
__release_extent_node(sbi, et, prev_ex);
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
if (en)
__release_extent_node(sbi, et, prev_ex);
en = next_ex;
}
......@@ -416,7 +413,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
return en;
}
static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
static void f2fs_update_extent_tree_range(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
......@@ -429,7 +426,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int pos = (unsigned int)fofs;
if (!et)
return false;
return;
trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);
......@@ -437,7 +434,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
write_unlock(&et->lock);
return false;
return;
}
prev = et->largest;
......@@ -492,9 +489,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (!next_en) {
struct rb_node *node = rb_next(&en->rb_node);
next_en = node ?
rb_entry(node, struct extent_node, rb_node)
: NULL;
next_en = rb_entry_safe(node, struct extent_node,
rb_node);
}
if (parts)
......@@ -535,8 +531,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
__free_extent_tree(sbi, et);
write_unlock(&et->lock);
return !__is_extent_same(&prev, &et->largest);
}
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
......@@ -552,7 +546,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
if (!atomic_read(&sbi->total_zombie_tree))
goto free_node;
if (!down_write_trylock(&sbi->extent_tree_lock))
if (!mutex_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
......@@ -574,11 +568,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
goto unlock_out;
cond_resched();
}
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
free_node:
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
if (!mutex_trylock(&sbi->extent_tree_lock))
goto out;
remained = nr_shrink - (node_cnt + tree_cnt);
......@@ -608,7 +602,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
spin_unlock(&sbi->extent_lock);
unlock_out:
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
......@@ -655,10 +649,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)
if (inode->i_nlink && !is_bad_inode(inode) &&
atomic_read(&et->node_cnt)) {
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
list_add_tail(&et->list, &sbi->zombie_list);
atomic_inc(&sbi->total_zombie_tree);
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
return;
}
......@@ -666,12 +660,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
node_cnt = f2fs_destroy_extent_node(inode);
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
mutex_lock(&sbi->extent_tree_lock);
f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
atomic_dec(&sbi->total_ext_tree);
up_write(&sbi->extent_tree_lock);
mutex_unlock(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
......@@ -718,7 +712,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
init_rwsem(&sbi->extent_tree_lock);
mutex_init(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
atomic_set(&sbi->total_ext_tree, 0);
......
This diff is collapsed.
......@@ -20,6 +20,7 @@
#include <linux/uaccess.h>
#include <linux/mount.h>
#include <linux/pagevec.h>
#include <linux/uio.h>
#include <linux/uuid.h>
#include <linux/file.h>
......@@ -140,8 +141,6 @@ static inline bool need_do_checkpoint(struct inode *inode)
need_cp = true;
else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
need_cp = true;
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
else if (test_opt(sbi, FASTBOOT))
need_cp = true;
else if (sbi->active_logs == 2)
......@@ -167,7 +166,6 @@ static void try_to_fix_pino(struct inode *inode)
nid_t pino;
down_write(&fi->i_sem);
fi->xattr_ver = 0;
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
f2fs_i_pino_write(inode, pino);
......@@ -276,7 +274,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
flush_out:
remove_ino_entry(sbi, ino, UPDATE_INO);
clear_inode_flag(inode, FI_UPDATE_WRITE);
ret = f2fs_issue_flush(sbi);
if (!atomic)
ret = f2fs_issue_flush(sbi);
f2fs_update_time(sbi, REQ_TIME);
out:
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
......@@ -567,8 +566,9 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
}
if (f2fs_has_inline_data(inode)) {
if (truncate_inline_inode(ipage, from))
set_page_dirty(ipage);
truncate_inline_inode(ipage, from);
if (from == 0)
clear_inode_flag(inode, FI_DATA_EXIST);
f2fs_put_page(ipage, 1);
truncate_page = true;
goto out;
......@@ -1541,6 +1541,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (ret)
clear_inode_flag(inode, FI_ATOMIC_FILE);
out:
stat_inc_atomic_write(inode);
stat_update_max_atomic_write(inode);
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
......@@ -1564,15 +1566,18 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
goto err_out;
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
ret = commit_inmem_pages(inode);
if (ret) {
set_inode_flag(inode, FI_ATOMIC_FILE);
if (ret)
goto err_out;
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
if (!ret) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
stat_dec_atomic_write(inode);
}
} else {
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
}
ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
err_out:
inode_unlock(inode);
mnt_drop_write_file(filp);
......@@ -1870,7 +1875,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
struct extent_info ei;
struct extent_info ei = {0,0,0};
pgoff_t pg_start, pg_end;
unsigned int blk_per_seg = sbi->blocks_per_seg;
unsigned int total = 0, sec_num;
......@@ -2250,8 +2255,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret > 0) {
int err = f2fs_preallocate_blocks(iocb, from);
int err;
if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
set_inode_flag(inode, FI_NO_PREALLOC);
err = f2fs_preallocate_blocks(iocb, from);
if (err) {
inode_unlock(inode);
return err;
......@@ -2259,6 +2268,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
blk_finish_plug(&plug);
clear_inode_flag(inode, FI_NO_PREALLOC);
}
inode_unlock(inode);
......
......@@ -48,8 +48,10 @@ static int gc_thread_func(void *data)
}
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_CHECKPOINT))
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
#endif
/*
......@@ -166,7 +168,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->ofs_unit = sbi->segs_per_sec;
}
if (p->max_search > sbi->max_victim_search)
/* we need to check every dirty segments in the FG_GC case */
if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
p->offset = sbi->last_victim[p->gc_mode];
......@@ -199,6 +202,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
if (sec_usage_check(sbi, secno))
continue;
if (no_fggc_candidate(sbi, secno))
continue;
clear_bit(secno, dirty_i->victim_secmap);
return secno * sbi->segs_per_sec;
}
......@@ -237,6 +244,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}
static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
unsigned int segno)
{
unsigned int valid_blocks =
get_valid_blocks(sbi, segno, sbi->segs_per_sec);
return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
valid_blocks * 2 : valid_blocks;
}
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
unsigned int segno, struct victim_sel_policy *p)
{
......@@ -245,7 +262,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
return get_greedy_cost(sbi, segno);
else
return get_cb_cost(sbi, segno);
}
......@@ -322,13 +339,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
nsearched++;
}
secno = GET_SECNO(sbi, segno);
if (sec_usage_check(sbi, secno))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
if (gc_type == FG_GC && p.alloc_mode == LFS &&
no_fggc_candidate(sbi, secno))
goto next;
cost = get_gc_cost(sbi, segno, &p);
......@@ -569,6 +588,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
if (f2fs_is_atomic_file(inode))
goto out;
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
if (err)
......@@ -661,6 +683,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
if (!check_valid_map(F2FS_I_SB(inode), segno, off))
goto out;
if (f2fs_is_atomic_file(inode))
goto out;
if (gc_type == BG_GC) {
if (PageWriteback(page))
goto out;
......@@ -921,8 +946,6 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
cpc.reason = __get_cp_reason(sbi);
gc_more:
segno = NULL_SEGNO;
if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
goto stop;
if (unlikely(f2fs_cp_error(sbi))) {
......@@ -930,30 +953,23 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
goto stop;
}
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) {
gc_type = FG_GC;
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
/*
* If there is no victim and no prefree segment but still not
* enough free sections, we should flush dent/node blocks and do
* garbage collections.
* For example, if there are many prefree_segments below given
* threshold, we can make them free by checkpoint. Then, we
* secure free segments which doesn't need fggc any more.
*/
if (__get_victim(sbi, &segno, gc_type) ||
prefree_segments(sbi)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
segno = NULL_SEGNO;
} else if (has_not_enough_free_secs(sbi, 0, 0)) {
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
}
} else if (gc_type == BG_GC && !background) {
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
goto stop;
ret = write_checkpoint(sbi, &cpc);
if (ret)
goto stop;
if (has_not_enough_free_secs(sbi, 0, 0))
gc_type = FG_GC;
}
if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
if (gc_type == BG_GC && !background)
goto stop;
if (!__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
......@@ -983,5 +999,16 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
void build_gc_manager(struct f2fs_sb_info *sbi)
{
u64 main_count, resv_count, ovp_count, blocks_per_sec;
DIRTY_I(sbi)->v_ops = &default_v_ops;
/* threshold of # of valid blocks in a section for victims of FG_GC */
main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec,
(main_count - resv_count));
}
......@@ -373,8 +373,10 @@ void f2fs_evict_inode(struct inode *inode)
goto no_delete;
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (time_to_inject(sbi, FAULT_EVICT_INODE))
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
goto no_delete;
}
#endif
remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
......
......@@ -321,9 +321,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (err)
goto err_out;
}
if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
if (f2fs_encrypted_inode(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
bool nokey = f2fs_encrypted_inode(inode) &&
!fscrypt_has_encryption_key(inode);
err = nokey ? -ENOKEY : -EPERM;
......@@ -663,6 +663,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err = -ENOENT;
if ((f2fs_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_encryption_key(new_dir)))
return -ENOKEY;
if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_permitted_context(new_dir, old_inode)) {
err = -EPERM;
......@@ -843,6 +849,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int old_nlink = 0, new_nlink = 0;
int err = -ENOENT;
if ((f2fs_encrypted_inode(old_dir) &&
!fscrypt_has_encryption_key(old_dir)) ||
(f2fs_encrypted_inode(new_dir) &&
!fscrypt_has_encryption_key(new_dir)))
return -ENOKEY;
if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
(old_dir != new_dir) &&
(!fscrypt_has_permitted_context(new_dir, old_inode) ||
......
This diff is collapsed.
......@@ -174,7 +174,7 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
spin_unlock(&nm_i->nid_list_lock);
return;
}
fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next,
fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
struct free_nid, list);
*nid = fnid->nid;
spin_unlock(&nm_i->nid_list_lock);
......@@ -186,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
#ifdef CONFIG_F2FS_CHECK_FS
if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir,
nm_i->bitmap_size))
f2fs_bug_on(sbi, 1);
#endif
memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
}
......@@ -228,6 +234,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);
f2fs_change_bit(block_off, nm_i->nat_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_change_bit(block_off, nm_i->nat_bitmap_mir);
#endif
}
static inline nid_t ino_of_node(struct page *node_page)
......@@ -291,14 +300,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
struct f2fs_node *rn = F2FS_NODE(page);
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
__u64 cp_ver = cur_cp_version(ckpt);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
cp_ver |= (cur_cp_crc(ckpt) << 32);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
rn->footer.cp_ver = cpu_to_le64(cp_ver);
rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
}
......@@ -306,14 +312,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
static inline bool is_recoverable_dnode(struct page *page)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
__u64 cp_ver = cur_cp_version(ckpt);
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
__u64 crc = le32_to_cpu(*((__le32 *)
((unsigned char *)ckpt + crc_offset)));
cp_ver |= (crc << 32);
}
if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
cp_ver |= (cur_cp_crc(ckpt) << 32);
return cp_ver == cpver_of_node(page);
}
......@@ -343,7 +346,7 @@ static inline bool IS_DNODE(struct page *node_page)
unsigned int ofs = ofs_of_node(node_page);
if (f2fs_has_xattr_block(ofs))
return false;
return true;
if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
ofs == 5 + 2 * NIDS_PER_BLOCK)
......
......@@ -378,11 +378,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (IS_INODE(page)) {
recover_inline_xattr(inode, page);
} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
/*
* Deprecated; xattr blocks should be found from cold log.
* But, we should remain this for backward compatibility.
*/
recover_xattr_data(inode, page, blkaddr);
err = recover_xattr_data(inode, page, blkaddr);
if (!err)
recovered++;
goto out;
}
......@@ -428,8 +426,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
}
if (!file_keep_isize(inode) &&
(i_size_read(inode) <= (start << PAGE_SHIFT)))
f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
f2fs_i_size_write(inode,
(loff_t)(start + 1) << PAGE_SHIFT);
/*
* dest is reserved block, invalidate src block
......@@ -552,10 +551,8 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
struct list_head inode_list;
struct list_head dir_list;
block_t blkaddr;
int err;
int ret = 0;
bool need_writecp = false;
......@@ -571,8 +568,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
/* prevent checkpoint */
mutex_lock(&sbi->cp_mutex);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
if (err || list_empty(&inode_list))
......
This diff is collapsed.
......@@ -164,6 +164,9 @@ struct seg_entry {
unsigned int ckpt_valid_blocks:10; /* # of valid blocks last cp */
unsigned int padding:6; /* padding */
unsigned char *cur_valid_map; /* validity bitmap of blocks */
#ifdef CONFIG_F2FS_CHECK_FS
unsigned char *cur_valid_map_mir; /* mirror of current valid bitmap */
#endif
/*
* # of valid blocks and the validity bitmap stored in the the last
* checkpoint pack. This information is used by the SSR mode.
......@@ -186,9 +189,12 @@ struct segment_allocation {
* the page is atomically written, and it is in inmem_pages list.
*/
#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
struct inmem_pages {
struct list_head list;
......@@ -203,6 +209,9 @@ struct sit_info {
block_t sit_blocks; /* # of blocks used by SIT area */
block_t written_valid_blocks; /* # of valid blocks in main area */
char *sit_bitmap; /* SIT bitmap pointer */
#ifdef CONFIG_F2FS_CHECK_FS
char *sit_bitmap_mir; /* SIT bitmap mirror */
#endif
unsigned int bitmap_size; /* SIT bitmap size */
unsigned long *tmp_map; /* bitmap for temporal use */
......@@ -317,6 +326,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
#ifdef CONFIG_F2FS_CHECK_FS
memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
#endif
se->type = GET_SIT_TYPE(rs);
se->mtime = le64_to_cpu(rs->mtime);
}
......@@ -414,6 +426,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
void *dst_addr)
{
struct sit_info *sit_i = SIT_I(sbi);
#ifdef CONFIG_F2FS_CHECK_FS
if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir,
sit_i->bitmap_size))
f2fs_bug_on(sbi, 1);
#endif
memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
}
......@@ -634,6 +652,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
check_seg_range(sbi, start);
#ifdef CONFIG_F2FS_CHECK_FS
if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
f2fs_test_bit(offset, sit_i->sit_bitmap_mir))
f2fs_bug_on(sbi, 1);
#endif
/* calculate sit block address */
if (f2fs_test_bit(offset, sit_i->sit_bitmap))
blk_addr += sit_i->sit_blocks;
......@@ -659,6 +683,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
unsigned int block_off = SIT_BLOCK_OFFSET(start);
f2fs_change_bit(block_off, sit_i->sit_bitmap);
#ifdef CONFIG_F2FS_CHECK_FS
f2fs_change_bit(block_off, sit_i->sit_bitmap_mir);
#endif
}
static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
......@@ -689,6 +716,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
- (base + 1) + type;
}
static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
unsigned int secno)
{
if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
sbi->fggc_threshold)
return true;
return false;
}
static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
{
if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
......@@ -700,8 +736,8 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
* It is very important to gather dirty pages and write at once, so that we can
* submit a big bio without interfering other data writes.
* By default, 512 pages for directory data,
* 512 pages (2MB) * 3 for three types of nodes, and
* max_bio_blocks for meta are set.
* 512 pages (2MB) * 8 for nodes, and
* 256 pages * 8 for meta are set.
*/
static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
{
......
This diff is collapsed.
......@@ -217,6 +217,112 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
return entry;
}
static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
void **last_addr, int index,
size_t len, const char *name)
{
struct f2fs_xattr_entry *entry;
unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
list_for_each_xattr(entry, base_addr) {
if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
(void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
base_addr + inline_size) {
*last_addr = entry;
return NULL;
}
if (entry->e_name_index != index)
continue;
if (entry->e_name_len != len)
continue;
if (!memcmp(entry->e_name, name, len))
break;
}
return entry;
}
static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
unsigned int index, unsigned int len,
const char *name, struct f2fs_xattr_entry **xe,
void **base_addr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
void *cur_addr, *txattr_addr, *last_addr = NULL;
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
unsigned int inline_size = 0;
int err = 0;
inline_size = inline_xattr_size(inode);
if (!size && !inline_size)
return -ENODATA;
txattr_addr = kzalloc(inline_size + size + sizeof(__u32),
GFP_F2FS_ZERO);
if (!txattr_addr)
return -ENOMEM;
/* read from inline xattr */
if (inline_size) {
struct page *page = NULL;
void *inline_addr;
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
err = PTR_ERR(page);
goto out;
}
inline_addr = inline_xattr_addr(page);
}
memcpy(txattr_addr, inline_addr, inline_size);
f2fs_put_page(page, 1);
*xe = __find_inline_xattr(txattr_addr, &last_addr,
index, len, name);
if (*xe)
goto check;
}
/* read from xattr node block */
if (xnid) {
struct page *xpage;
void *xattr_addr;
/* The inode already has an extended attribute block. */
xpage = get_node_page(sbi, xnid);
if (IS_ERR(xpage)) {
err = PTR_ERR(xpage);
goto out;
}
xattr_addr = page_address(xpage);
memcpy(txattr_addr + inline_size, xattr_addr, size);
f2fs_put_page(xpage, 1);
}
if (last_addr)
cur_addr = XATTR_HDR(last_addr) - 1;
else
cur_addr = txattr_addr;
*xe = __find_xattr(cur_addr, index, len, name);
check:
if (IS_XATTR_LAST_ENTRY(*xe)) {
err = -ENODATA;
goto out;
}
*base_addr = txattr_addr;
return 0;
out:
kzfree(txattr_addr);
return err;
}
static int read_all_xattrs(struct inode *inode, struct page *ipage,
void **base_addr)
{
......@@ -348,23 +454,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
}
xattr_addr = page_address(xpage);
memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
sizeof(struct node_footer));
memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE);
set_page_dirty(xpage);
f2fs_put_page(xpage, 1);
/* need to checkpoint during fsync */
F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
return 0;
}
int f2fs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size, struct page *ipage)
{
struct f2fs_xattr_entry *entry;
void *base_addr;
struct f2fs_xattr_entry *entry = NULL;
int error = 0;
size_t size, len;
unsigned int size, len;
void *base_addr = NULL;
if (name == NULL)
return -EINVAL;
......@@ -373,21 +476,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
if (len > F2FS_NAME_LEN)
return -ERANGE;
error = read_all_xattrs(inode, ipage, &base_addr);
error = lookup_all_xattrs(inode, ipage, index, len, name,
&entry, &base_addr);
if (error)
return error;
entry = __find_xattr(base_addr, index, len, name);
if (IS_XATTR_LAST_ENTRY(entry)) {
error = -ENODATA;
goto cleanup;
}
size = le16_to_cpu(entry->e_value_size);
if (buffer && size > buffer_size) {
error = -ERANGE;
goto cleanup;
goto out;
}
if (buffer) {
......@@ -395,8 +493,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
memcpy(buffer, pval, size);
}
error = size;
cleanup:
out:
kzfree(base_addr);
return error;
}
......@@ -445,6 +542,13 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
return error;
}
static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
const void *value, size_t size)
{
void *pval = entry->e_name + entry->e_name_len;
return (entry->e_value_size == size) && !memcmp(pval, value, size);
}
static int __f2fs_setxattr(struct inode *inode, int index,
const char *name, const void *value, size_t size,
struct page *ipage, int flags)
......@@ -479,12 +583,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,
found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;
if ((flags & XATTR_REPLACE) && !found) {
if (found) {
if ((flags & XATTR_CREATE)) {
error = -EEXIST;
goto exit;
}
if (f2fs_xattr_value_same(here, value, size))
goto exit;
} else if ((flags & XATTR_REPLACE)) {
error = -ENODATA;
goto exit;
} else if ((flags & XATTR_CREATE) && found) {
error = -EEXIST;
goto exit;
}
last = here;
......
......@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
for (entry = XATTR_FIRST_ENTRY(addr);\
!IS_XATTR_LAST_ENTRY(entry);\
entry = XATTR_NEXT_ENTRY(entry))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE - \
sizeof(struct node_footer) - sizeof(__u32))
#define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer))
#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32))
#define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \
VALID_XATTR_BLOCK_SIZE)
#define MAX_VALUE_LEN(i) (MIN_OFFSET(i) - \
sizeof(struct f2fs_xattr_header) - \
......
......@@ -36,6 +36,12 @@
#define F2FS_NODE_INO(sbi) (sbi->node_ino_num)
#define F2FS_META_INO(sbi) (sbi->meta_ino_num)
#define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */
#define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */
#define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */
#define F2FS_IO_SIZE_BITS(sbi) ((sbi)->write_io_size_bits) /* power of 2 */
#define F2FS_IO_SIZE_MASK(sbi) (F2FS_IO_SIZE(sbi) - 1)
/* This flag is used by node and meta inodes, and by recovery */
#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
......@@ -108,6 +114,7 @@ struct f2fs_super_block {
/*
* For checkpoint
*/
#define CP_NAT_BITS_FLAG 0x00000080
#define CP_CRC_RECOVERY_FLAG 0x00000040
#define CP_FASTBOOT_FLAG 0x00000020
#define CP_FSCK_FLAG 0x00000010
......@@ -272,6 +279,7 @@ struct f2fs_node {
* For NAT entries
*/
#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
#define NAT_ENTRY_BITMAP_SIZE ((NAT_ENTRY_PER_BLOCK + 7) / 8)
struct f2fs_nat_entry {
__u8 version; /* latest version of cached nat entry */
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment