Commit 4c12ab7e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "The major work includes fixing and enhancing the existing extent_cache
  feature, which has been well settling down so far and now it becomes a
  default mount option accordingly.

  Also, this version newly registers a f2fs memory shrinker to reclaim
  several objects consumed by a couple of data structures in order to
  avoid memory pressures.

  Another new feature is to add ioctl(F2FS_GARBAGE_COLLECT) which
  triggers a cleaning job explicitly by users.

  Most of the other patches are to fix bugs occurred in the corner cases
  across the whole code area"

* tag 'for-f2fs-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (85 commits)
  f2fs: upset segment_info repair
  f2fs: avoid accessing NULL pointer in f2fs_drop_largest_extent
  f2fs: update extent tree in batches
  f2fs: fix to release inode correctly
  f2fs: handle f2fs_truncate error correctly
  f2fs: avoid unneeded initializing when converting inline dentry
  f2fs: atomically set inode->i_flags
  f2fs: fix wrong pointer access during try_to_free_nids
  f2fs: use __GFP_NOFAIL to avoid infinite loop
  f2fs: lookup neighbor extent nodes for merging later
  f2fs: split __insert_extent_tree_ret for readability
  f2fs: kill dead code in __insert_extent_tree
  f2fs: adjust showing of extent cache stat
  f2fs: add largest/cached stat in extent cache
  f2fs: fix incorrect mapping for bmap
  f2fs: add annotation for space utilization of regular/inline dentry
  f2fs: fix to update cached_en of extent tree properly
  f2fs: fix typo
  f2fs: check the node block address of newly allocated nid
  f2fs: go out for insert_inode_locked failure
  ...
parents 9cbf22b3 01a5ad82
......@@ -143,7 +143,9 @@ fastboot This option is used when a system wants to reduce mount
extent_cache Enable an extent cache based on rb-tree, it can cache
as many as extent which map between contiguous logical
address and physical address per inode, resulting in
increasing the cache hit ratio.
increasing the cache hit ratio. Set by default.
noextent_cache Diable an extent cache based on rb-tree explicitly, see
the above extent_cache mount option.
noinline_data Disable the inline data feature, inline data feature is
enabled by default.
......
......@@ -4416,6 +4416,7 @@ F: include/linux/fscache*.h
F2FS FILE SYSTEM
M: Jaegeuk Kim <jaegeuk@kernel.org>
M: Changman Lee <cm224.lee@samsung.com>
R: Chao Yu <chao2.yu@samsung.com>
L: linux-f2fs-devel@lists.sourceforge.net
W: http://en.wikipedia.org/wiki/F2FS
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
......@@ -4424,6 +4425,7 @@ F: Documentation/filesystems/f2fs.txt
F: Documentation/ABI/testing/sysfs-fs-f2fs
F: fs/f2fs/
F: include/linux/f2fs_fs.h
F: include/trace/events/f2fs.h
FUJITSU FR-V (FRV) PORT
M: David Howells <dhowells@redhat.com>
......
......@@ -45,7 +45,7 @@ config F2FS_FS_POSIX_ACL
default y
help
Posix Access Control Lists (ACLs) support permissions for users and
gourps beyond the owner/group/world scheme.
groups beyond the owner/group/world scheme.
To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.
......
......@@ -2,6 +2,7 @@ obj-$(CONFIG_F2FS_FS) += f2fs.o
f2fs-y := dir.o file.o inode.o namei.o hash.o super.o inline.o
f2fs-y += checkpoint.o gc.o data.o node.o segment.o recovery.o
f2fs-y += shrinker.o extent_cache.o
f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
......
......@@ -69,14 +69,24 @@ struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
fio.page = page;
if (f2fs_submit_page_bio(&fio))
if (f2fs_submit_page_bio(&fio)) {
f2fs_put_page(page, 1);
goto repeat;
}
lock_page(page);
if (unlikely(page->mapping != mapping)) {
f2fs_put_page(page, 1);
goto repeat;
}
/*
* if there is any IO error when accessing device, make our filesystem
* readonly and make sure do not write checkpoint with non-uptodate
* meta page.
*/
if (unlikely(!PageUptodate(page)))
f2fs_stop_checkpoint(sbi);
out:
return page;
}
......@@ -326,26 +336,18 @@ const struct address_space_operations f2fs_meta_aops = {
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
{
struct inode_management *im = &sbi->im[type];
struct ino_entry *e;
struct ino_entry *e, *tmp;
tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
retry:
if (radix_tree_preload(GFP_NOFS)) {
cond_resched();
goto retry;
}
radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
spin_lock(&im->ino_lock);
e = radix_tree_lookup(&im->ino_root, ino);
if (!e) {
e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
if (!e) {
spin_unlock(&im->ino_lock);
radix_tree_preload_end();
goto retry;
}
e = tmp;
if (radix_tree_insert(&im->ino_root, ino, e)) {
spin_unlock(&im->ino_lock);
kmem_cache_free(ino_entry_slab, e);
radix_tree_preload_end();
goto retry;
}
......@@ -358,6 +360,9 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
}
spin_unlock(&im->ino_lock);
radix_tree_preload_end();
if (e != tmp)
kmem_cache_free(ino_entry_slab, tmp);
}
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
......@@ -458,24 +463,34 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
__remove_ino_entry(sbi, ino, ORPHAN_INO);
}
static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode = f2fs_iget(sbi->sb, ino);
f2fs_bug_on(sbi, IS_ERR(inode));
struct inode *inode;
inode = f2fs_iget(sbi->sb, ino);
if (IS_ERR(inode)) {
/*
* there should be a bug that we can't find the entry
* to orphan inode.
*/
f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
return PTR_ERR(inode);
}
clear_nlink(inode);
/* truncate all the data during iput */
iput(inode);
return 0;
}
void recover_orphan_inodes(struct f2fs_sb_info *sbi)
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
{
block_t start_blk, orphan_blocks, i, j;
int err;
if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
return;
set_sbi_flag(sbi, SBI_POR_DOING);
return 0;
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
......@@ -489,14 +504,17 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
recover_orphan_inode(sbi, ino);
err = recover_orphan_inode(sbi, ino);
if (err) {
f2fs_put_page(page, 1);
return err;
}
}
f2fs_put_page(page, 1);
}
/* clear Orphan Flag */
clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
clear_sbi_flag(sbi, SBI_POR_DOING);
return;
return 0;
}
static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
......@@ -504,7 +522,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
struct list_head *head;
struct f2fs_orphan_block *orphan_blk = NULL;
unsigned int nentries = 0;
unsigned short index;
unsigned short index = 1;
unsigned short orphan_blocks;
struct page *page = NULL;
struct ino_entry *orphan = NULL;
......@@ -512,11 +530,6 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
for (index = 0; index < orphan_blocks; index++)
grab_meta_page(sbi, start_blk + index);
index = 1;
/*
* we don't need to do spin_lock(&im->ino_lock) here, since all the
* orphan inode operations are covered under f2fs_lock_op().
......@@ -527,12 +540,10 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
/* loop for each orphan inode entry and write them in Jornal block */
list_for_each_entry(orphan, head, list) {
if (!page) {
page = find_get_page(META_MAPPING(sbi), start_blk++);
f2fs_bug_on(sbi, !page);
page = grab_meta_page(sbi, start_blk++);
orphan_blk =
(struct f2fs_orphan_block *)page_address(page);
memset(orphan_blk, 0, sizeof(*orphan_blk));
f2fs_put_page(page, 0);
}
orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
......@@ -704,7 +715,8 @@ void update_dirty_page(struct inode *inode, struct page *page)
struct inode_entry *new;
int ret = 0;
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return;
if (!S_ISDIR(inode->i_mode)) {
......@@ -892,12 +904,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
__u32 crc32 = 0;
int i;
int cp_payload_blks = __cp_payload(sbi);
block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
bool invalidate = false;
/*
* This avoids to conduct wrong roll-forward operations and uses
* metapages, so should be called prior to sync_meta_pages below.
*/
discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
if (discard_next_dnode(sbi, discard_blk))
invalidate = true;
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META)) {
......@@ -1026,6 +1041,14 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
/* wait for previous submitted meta pages writeback */
wait_on_all_pages_writeback(sbi);
/*
* invalidate meta page which is used temporarily for zeroing out
* block at the end of warm node chain.
*/
if (invalidate)
invalidate_mapping_pages(META_MAPPING(sbi), discard_blk,
discard_blk);
release_dirty_inode(sbi);
if (unlikely(f2fs_cp_error(sbi)))
......
......@@ -92,7 +92,6 @@ static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
if (!ci)
return;
if (ci->ci_keyring_key)
key_put(ci->ci_keyring_key);
crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(f2fs_crypt_info_cachep, ci);
......
......@@ -14,6 +14,7 @@
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/prefetch.h>
......@@ -26,9 +27,6 @@
#include "trace.h"
#include <trace/events/f2fs.h>
static struct kmem_cache *extent_tree_slab;
static struct kmem_cache *extent_node_slab;
static void f2fs_read_end_io(struct bio *bio)
{
struct bio_vec *bvec;
......@@ -92,8 +90,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
{
struct bio *bio;
/* No failure on bio allocation */
bio = bio_alloc(GFP_NOIO, npages);
bio = f2fs_bio_alloc(npages);
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
......@@ -158,7 +155,6 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
bio_put(bio);
f2fs_put_page(page, 1);
return -EFAULT;
}
......@@ -266,645 +262,17 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
return err;
}
static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr;
read_lock(&fi->ext_lock);
if (fi->ext.len == 0) {
read_unlock(&fi->ext_lock);
return false;
}
stat_inc_total_hit(inode->i_sb);
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
start_blkaddr = fi->ext.blk;
if (pgofs >= start_fofs && pgofs <= end_fofs) {
*ei = fi->ext;
stat_inc_read_hit(inode->i_sb);
read_unlock(&fi->ext_lock);
return true;
}
read_unlock(&fi->ext_lock);
return false;
}
static bool update_extent_info(struct inode *inode, pgoff_t fofs,
block_t blkaddr)
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
int need_update = true;
write_lock(&fi->ext_lock);
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
start_blkaddr = fi->ext.blk;
end_blkaddr = fi->ext.blk + fi->ext.len - 1;
/* Drop and initialize the matched extent */
if (fi->ext.len == 1 && fofs == start_fofs)
fi->ext.len = 0;
/* Initial extent */
if (fi->ext.len == 0) {
if (blkaddr != NULL_ADDR) {
fi->ext.fofs = fofs;
fi->ext.blk = blkaddr;
fi->ext.len = 1;
}
goto end_update;
}
/* Front merge */
if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
fi->ext.fofs--;
fi->ext.blk--;
fi->ext.len++;
goto end_update;
}
/* Back merge */
if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
fi->ext.len++;
goto end_update;
}
/* Split the existing extent */
if (fi->ext.len > 1 &&
fofs >= start_fofs && fofs <= end_fofs) {
if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
fi->ext.len = fofs - start_fofs;
} else {
fi->ext.fofs = fofs + 1;
fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
fi->ext.len -= fofs - start_fofs + 1;
}
} else {
need_update = false;
}
/* Finally, if the extent is very fragmented, let's drop the cache. */
if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
fi->ext.len = 0;
set_inode_flag(fi, FI_NO_EXTENT);
need_update = true;
}
end_update:
write_unlock(&fi->ext_lock);
return need_update;
}
static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node *parent, struct rb_node **p)
{
struct extent_node *en;
en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
if (!en)
return NULL;
en->ei = *ei;
INIT_LIST_HEAD(&en->list);
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
et->count++;
atomic_inc(&sbi->total_ext_node);
return en;
}
static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
rb_erase(&en->rb_node, &et->root);
et->count--;
atomic_dec(&sbi->total_ext_node);
if (et->cached_en == en)
et->cached_en = NULL;
}
static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
nid_t ino)
{
struct extent_tree *et;
down_read(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
up_read(&sbi->extent_tree_lock);
return NULL;
}
atomic_inc(&et->refcount);
up_read(&sbi->extent_tree_lock);
return et;
}
static struct extent_tree *__grab_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
nid_t ino = inode->i_ino;
down_write(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
memset(et, 0, sizeof(struct extent_tree));
et->ino = ino;
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
atomic_set(&et->refcount, 0);
et->count = 0;
sbi->total_ext_tree++;
}
atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
return et;
}
static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
unsigned int fofs)
{
struct rb_node *node = et->root.rb_node;
struct extent_node *en;
if (et->cached_en) {
struct extent_info *cei = &et->cached_en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
return et->cached_en;
}
while (node) {
en = rb_entry(node, struct extent_node, rb_node);
if (fofs < en->ei.fofs) {
node = node->rb_left;
} else if (fofs >= en->ei.fofs + en->ei.len) {
node = node->rb_right;
} else {
et->cached_en = en;
return en;
}
}
return NULL;
}
static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
struct extent_node *prev;
struct rb_node *node;
node = rb_prev(&en->rb_node);
if (!node)
return NULL;
prev = rb_entry(node, struct extent_node, rb_node);
if (__is_back_mergeable(&en->ei, &prev->ei)) {
en->ei.fofs = prev->ei.fofs;
en->ei.blk = prev->ei.blk;
en->ei.len += prev->ei.len;
__detach_extent_node(sbi, et, prev);
return prev;
}
return NULL;
}
static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
struct extent_node *next;
struct rb_node *node;
node = rb_next(&en->rb_node);
if (!node)
return NULL;
next = rb_entry(node, struct extent_node, rb_node);
if (__is_front_mergeable(&en->ei, &next->ei)) {
en->ei.len += next->ei.len;
__detach_extent_node(sbi, et, next);
return next;
}
return NULL;
}
static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct extent_node **den)
{
struct rb_node **p = &et->root.rb_node;
struct rb_node *parent = NULL;
struct extent_node *en;
while (*p) {
parent = *p;
en = rb_entry(parent, struct extent_node, rb_node);
if (ei->fofs < en->ei.fofs) {
if (__is_front_mergeable(ei, &en->ei)) {
f2fs_bug_on(sbi, !den);
en->ei.fofs = ei->fofs;
en->ei.blk = ei->blk;
en->ei.len += ei->len;
*den = __try_back_merge(sbi, et, en);
return en;
}
p = &(*p)->rb_left;
} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
if (__is_back_mergeable(ei, &en->ei)) {
f2fs_bug_on(sbi, !den);
en->ei.len += ei->len;
*den = __try_front_merge(sbi, et, en);
return en;
}
p = &(*p)->rb_right;
} else {
f2fs_bug_on(sbi, 1);
}
}
return __attach_extent_node(sbi, et, ei, parent, p);
}
static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, bool free_all)
{
struct rb_node *node, *next;
struct extent_node *en;
unsigned int count = et->count;
node = rb_first(&et->root);
while (node) {
next = rb_next(node);
en = rb_entry(node, struct extent_node, rb_node);
if (free_all) {
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_del_init(&en->list);
spin_unlock(&sbi->extent_lock);
}
if (free_all || list_empty(&en->list)) {
__detach_extent_node(sbi, et, en);
kmem_cache_free(extent_node_slab, en);
}
node = next;
}
return count - et->count;
}
static void f2fs_init_extent_tree(struct inode *inode,
struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
struct inode *inode = dn->inode;
if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
return;
et = __grab_extent_tree(inode);
write_lock(&et->lock);
if (et->count)
goto out;
set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
en = __insert_extent_tree(sbi, et, &ei, NULL);
if (en) {
et->cached_en = en;
spin_lock(&sbi->extent_lock);
list_add_tail(&en->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
}
out:
write_unlock(&et->lock);
atomic_dec(&et->refcount);
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
trace_f2fs_lookup_extent_tree_start(inode, pgofs);
et = __find_extent_tree(sbi, inode->i_ino);
if (!et)
return false;
read_lock(&et->lock);
en = __lookup_extent_tree(et, pgofs);
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_move_tail(&en->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
stat_inc_read_hit(sbi->sb);
}
stat_inc_total_hit(sbi->sb);
read_unlock(&et->lock);
trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
atomic_dec(&et->refcount);
return en ? true : false;
}
static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
block_t blkaddr)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
struct extent_node *den = NULL;
struct extent_info ei, dei;
unsigned int endofs;
trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
et = __grab_extent_tree(inode);
write_lock(&et->lock);
/* 1. lookup and remove existing extent info in cache */
en = __lookup_extent_tree(et, fofs);
if (!en)
goto update_extent;
dei = en->ei;
__detach_extent_node(sbi, et, en);
/* 2. if extent can be split more, split and insert the left part */
if (dei.len > 1) {
/* insert left part of split extent into cache */
if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, dei.fofs, dei.blk,
fofs - dei.fofs);
en1 = __insert_extent_tree(sbi, et, &ei, NULL);
}
/* insert right part of split extent into cache */
endofs = dei.fofs + dei.len - 1;
if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, fofs + 1,
fofs - dei.fofs + dei.blk, endofs - fofs);
en2 = __insert_extent_tree(sbi, et, &ei, NULL);
}
}
update_extent:
/* 3. update extent in extent cache */
if (blkaddr) {
set_extent_info(&ei, fofs, blkaddr, 1);
en3 = __insert_extent_tree(sbi, et, &ei, &den);
}
/* 4. update in global extent list */
spin_lock(&sbi->extent_lock);
if (en && !list_empty(&en->list))
list_del(&en->list);
/*
* en1 and en2 split from en, they will become more and more smaller
* fragments after splitting several times. So if the length is smaller
* than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
*/
if (en1)
list_add_tail(&en1->list, &sbi->extent_list);
if (en2)
list_add_tail(&en2->list, &sbi->extent_list);
if (en3) {
if (list_empty(&en3->list))
list_add_tail(&en3->list, &sbi->extent_list);
else
list_move_tail(&en3->list, &sbi->extent_list);
}
if (den && !list_empty(&den->list))
list_del(&den->list);
spin_unlock(&sbi->extent_lock);
/* 5. release extent node */
if (en)
kmem_cache_free(extent_node_slab, en);
if (den)
kmem_cache_free(extent_node_slab, den);
write_unlock(&et->lock);
atomic_dec(&et->refcount);
}
void f2fs_preserve_extent_tree(struct inode *inode)
{
struct extent_tree *et;
struct extent_info *ext = &F2FS_I(inode)->ext;
bool sync = false;
if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
return;
et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
if (!et) {
if (ext->len) {
ext->len = 0;
update_inode_page(inode);
}
return;
}
read_lock(&et->lock);
if (et->count) {
struct extent_node *en;
if (et->cached_en) {
en = et->cached_en;
} else {
struct rb_node *node = rb_first(&et->root);
if (!node)
node = rb_last(&et->root);
en = rb_entry(node, struct extent_node, rb_node);
}
if (__is_extent_same(ext, &en->ei))
goto out;
*ext = en->ei;
sync = true;
} else if (ext->len) {
ext->len = 0;
sync = true;
}
out:
read_unlock(&et->lock);
atomic_dec(&et->refcount);
if (sync)
update_inode_page(inode);
}
void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
struct radix_tree_iter iter;
void **slot;
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
if (!test_opt(sbi, EXTENT_CACHE))
return;
if (available_free_memory(sbi, EXTENT_CACHE))
return;
spin_lock(&sbi->extent_lock);
list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
if (!nr_shrink--)
break;
list_del_init(&en->list);
}
spin_unlock(&sbi->extent_lock);
down_read(&sbi->extent_tree_lock);
while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
atomic_inc(&et->refcount);
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, false);
write_unlock(&et->lock);
atomic_dec(&et->refcount);
}
}
up_read(&sbi->extent_tree_lock);
down_write(&sbi->extent_tree_lock);
radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
F2FS_ROOT_INO(sbi)) {
struct extent_tree *et = (struct extent_tree *)*slot;
if (!atomic_read(&et->refcount) && !et->count) {
radix_tree_delete(&sbi->extent_tree_root, et->ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
tree_cnt++;
}
}
up_write(&sbi->extent_tree_lock);
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
}
void f2fs_destroy_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
unsigned int node_cnt = 0;
if (!test_opt(sbi, EXTENT_CACHE))
return;
et = __find_extent_tree(sbi, inode->i_ino);
if (!et)
goto out;
/* free all extent info belong to this extent tree */
write_lock(&et->lock);
node_cnt = __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
atomic_dec(&et->refcount);
/* try to find and delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
if (!et) {
up_write(&sbi->extent_tree_lock);
goto out;
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn->data_blkaddr = ei.blk + index - ei.fofs;
return 0;
}
f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
up_write(&sbi->extent_tree_lock);
out:
trace_f2fs_destroy_extent_tree(inode, node_cnt);
return;
}
void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
{
if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
f2fs_init_extent_tree(inode, i_ext);
write_lock(&F2FS_I(inode)->ext_lock);
get_extent_info(&F2FS_I(inode)->ext, *i_ext);
write_unlock(&F2FS_I(inode)->ext_lock);
}
static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
return false;
if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
return f2fs_lookup_extent_tree(inode, pgofs, ei);
return lookup_extent_info(inode, pgofs, ei);
}
void f2fs_update_extent_cache(struct dnode_of_data *dn)
{
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs;
f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
if (is_inode_flag_set(fi, FI_NO_EXTENT))
return;
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
return f2fs_update_extent_tree(dn->inode, fofs,
dn->data_blkaddr);
if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
sync_inode_page(dn);
return f2fs_reserve_block(dn, index);
}
struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
......@@ -935,15 +303,13 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err) {
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
if (err)
goto put_err;
f2fs_put_dnode(&dn);
if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
f2fs_put_page(page, 1);
return ERR_PTR(-ENOENT);
err = -ENOENT;
goto put_err;
}
got_it:
if (PageUptodate(page)) {
......@@ -968,8 +334,12 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
fio.page = page;
err = f2fs_submit_page_bio(&fio);
if (err)
return ERR_PTR(err);
goto put_err;
return page;
put_err:
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
struct page *find_data_page(struct inode *inode, pgoff_t index)
......@@ -1030,7 +400,8 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
*
* Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
* Note that, ipage is set only by make_empty_dir.
* Note that, ipage is set only by make_empty_dir, and if any error occur,
* ipage should be released by this function.
*/
struct page *get_new_data_page(struct inode *inode,
struct page *ipage, pgoff_t index, bool new_i_size)
......@@ -1041,8 +412,14 @@ struct page *get_new_data_page(struct inode *inode,
int err;
repeat:
page = grab_cache_page(mapping, index);
if (!page)
if (!page) {
/*
* before exiting, we should make sure ipage will be released
* if any error occur.
*/
f2fs_put_page(ipage, 1);
return ERR_PTR(-ENOMEM);
}
set_new_dnode(&dn, inode, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, index);
......@@ -1107,8 +484,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
&sum, seg);
/* direct IO doesn't use extent cache to maximize the performance */
set_data_blkaddr(dn);
/* update i_size */
......@@ -1117,6 +492,9 @@ static int __allocate_data_block(struct dnode_of_data *dn)
if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));
/* direct IO doesn't use extent cache to maximize the performance */
f2fs_drop_largest_extent(dn->inode, fofs);
return 0;
}
......@@ -1183,7 +561,7 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
* c. give the block addresses to blockdev
*/
static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
int create, bool fiemap)
int create, int flag)
{
unsigned int maxblocks = map->m_len;
struct dnode_of_data dn;
......@@ -1217,8 +595,19 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
err = 0;
goto unlock_out;
}
if (dn.data_blkaddr == NEW_ADDR && !fiemap)
if (dn.data_blkaddr == NEW_ADDR) {
if (flag == F2FS_GET_BLOCK_BMAP) {
err = -ENOENT;
goto put_out;
} else if (flag == F2FS_GET_BLOCK_READ ||
flag == F2FS_GET_BLOCK_DIO) {
goto put_out;
}
/*
* if it is in fiemap call path (flag = F2FS_GET_BLOCK_FIEMAP),
* mark it as mapped and unwritten block.
*/
}
if (dn.data_blkaddr != NULL_ADDR) {
map->m_flags = F2FS_MAP_MAPPED;
......@@ -1233,6 +622,8 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;
map->m_pblk = dn.data_blkaddr;
} else {
if (flag == F2FS_GET_BLOCK_BMAP)
err = -ENOENT;
goto put_out;
}
......@@ -1255,7 +646,9 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
err = 0;
goto unlock_out;
}
if (dn.data_blkaddr == NEW_ADDR && !fiemap)
if (dn.data_blkaddr == NEW_ADDR &&
flag != F2FS_GET_BLOCK_FIEMAP)
goto put_out;
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
......@@ -1297,7 +690,7 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
}
static int __get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create, bool fiemap)
struct buffer_head *bh, int create, int flag)
{
struct f2fs_map_blocks map;
int ret;
......@@ -1305,7 +698,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
ret = f2fs_map_blocks(inode, &map, create, fiemap);
ret = f2fs_map_blocks(inode, &map, create, flag);
if (!ret) {
map_bh(bh, inode->i_sb, map.m_pblk);
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
......@@ -1315,15 +708,23 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
}
static int get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create, int flag)
{
return __get_data_block(inode, iblock, bh_result, create, flag);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create, false);
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DIO);
}
static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create, true);
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP);
}
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
......@@ -1367,7 +768,8 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
memset(&map_bh, 0, sizeof(struct buffer_head));
map_bh.b_size = len;
ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0);
ret = get_data_block(inode, start_blk, &map_bh, 0,
F2FS_GET_BLOCK_FIEMAP);
if (ret)
goto out;
......@@ -1770,6 +1172,137 @@ static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
return ret;
}
/*
* This function was copied from write_cche_pages from mm/page-writeback.c.
* The major change is making write step of cold data page separately from
* warm/hot data page.
*/
static int f2fs_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data)
{
int ret = 0;
int done = 0;
struct pagevec pvec;
int nr_pages;
pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
pgoff_t end; /* Inclusive */
pgoff_t done_index;
int cycled;
int range_whole = 0;
int tag;
int step = 0;
pagevec_init(&pvec, 0);
next:
if (wbc->range_cyclic) {
writeback_index = mapping->writeback_index; /* prev offset */
index = writeback_index;
if (index == 0)
cycled = 1;
else
cycled = 0;
end = -1;
} else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
cycled = 1; /* ignore range_cyclic tests */
}
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag = PAGECACHE_TAG_TOWRITE;
else
tag = PAGECACHE_TAG_DIRTY;
retry:
if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
tag_pages_for_writeback(mapping, index, end);
done_index = index;
while (!done && (index <= end)) {
int i;
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
if (page->index > end) {
done = 1;
break;
}
done_index = page->index;
lock_page(page);
if (unlikely(page->mapping != mapping)) {
continue_unlock:
unlock_page(page);
continue;
}
if (!PageDirty(page)) {
/* someone wrote it for us */
goto continue_unlock;
}
if (step == is_cold_data(page))
goto continue_unlock;
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE)
f2fs_wait_on_page_writeback(page, DATA);
else
goto continue_unlock;
}
BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
ret = (*writepage)(page, wbc, data);
if (unlikely(ret)) {
if (ret == AOP_WRITEPAGE_ACTIVATE) {
unlock_page(page);
ret = 0;
} else {
done_index = page->index + 1;
done = 1;
break;
}
}
if (--wbc->nr_to_write <= 0 &&
wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
}
pagevec_release(&pvec);
cond_resched();
}
if (step < 1) {
step++;
goto next;
}
if (!cycled && !done) {
cycled = 1;
index = 0;
end = writeback_index - 1;
goto retry;
}
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = done_index;
return ret;
}
static int f2fs_write_data_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
......@@ -1785,6 +1318,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
if (!mapping->a_ops->writepage)
return 0;
/* skip writing if there is no dirty page in this inode */
if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
return 0;
if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
available_free_memory(sbi, DIRTY_DENTS))
......@@ -1800,12 +1337,11 @@ static int f2fs_write_data_pages(struct address_space *mapping,
mutex_lock(&sbi->writepages);
locked = true;
}
ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
f2fs_submit_merged_bio(sbi, DATA, WRITE);
if (locked)
mutex_unlock(&sbi->writepages);
f2fs_submit_merged_bio(sbi, DATA, WRITE);
remove_dirty_dir_inode(inode);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
......@@ -1832,7 +1368,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *page, *ipage;
struct page *page = NULL;
struct page *ipage;
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
struct dnode_of_data dn;
int err = 0;
......@@ -1882,25 +1419,28 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
if (err)
goto put_fail;
}
err = f2fs_reserve_block(&dn, index);
err = f2fs_get_block(&dn, index);
if (err)
goto put_fail;
put_next:
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
return 0;
f2fs_wait_on_page_writeback(page, DATA);
if (len == PAGE_CACHE_SIZE)
goto out_update;
if (PageUptodate(page))
goto out_clear;
if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
unsigned start = pos & (PAGE_CACHE_SIZE - 1);
unsigned end = start + len;
/* Reading beyond i_size is simple: memset to zero */
zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
goto out;
goto out_update;
}
if (dn.data_blkaddr == NEW_ADDR) {
......@@ -1920,7 +1460,6 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
err = -EIO;
goto fail;
}
......@@ -1932,14 +1471,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
/* avoid symlink page */
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
err = f2fs_decrypt_one(inode, page);
if (err) {
f2fs_put_page(page, 1);
if (err)
goto fail;
}
}
}
out:
out_update:
SetPageUptodate(page);
out_clear:
clear_cold_data(page);
return 0;
......@@ -1947,8 +1485,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
f2fs_put_dnode(&dn);
unlock_fail:
f2fs_unlock_op(sbi);
f2fs_put_page(page, 1);
fail:
f2fs_put_page(page, 1);
f2fs_write_failed(mapping, pos + len);
return err;
}
......@@ -1979,9 +1517,6 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
{
unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
if (iov_iter_rw(iter) == READ)
return 0;
if (offset & blocksize_mask)
return -EINVAL;
......@@ -2010,15 +1545,16 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
if (check_direct_IO(inode, iter, offset))
return 0;
err = check_direct_IO(inode, iter, offset);
if (err)
return err;
trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
if (iov_iter_rw(iter) == WRITE)
__allocate_data_blocks(inode, offset, count);
err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block);
err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
if (err < 0 && iov_iter_rw(iter) == WRITE)
f2fs_write_failed(mapping, offset + count);
......@@ -2045,6 +1581,11 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
else
inode_dec_dirty_pages(inode);
}
/* This is atomic written page, keep Private */
if (IS_ATOMIC_WRITTEN_PAGE(page))
return;
ClearPagePrivate(page);
}
......@@ -2054,6 +1595,10 @@ int f2fs_release_page(struct page *page, gfp_t wait)
if (PageDirty(page))
return 0;
/* This is atomic written page, keep Private */
if (IS_ATOMIC_WRITTEN_PAGE(page))
return 0;
ClearPagePrivate(page);
return 1;
}
......@@ -2068,9 +1613,16 @@ static int f2fs_set_data_page_dirty(struct page *page)
SetPageUptodate(page);
if (f2fs_is_atomic_file(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
register_inmem_page(inode, page);
return 1;
}
/*
* Previously, this page has been registered, we just
* return here.
*/
return 0;
}
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
......@@ -2090,38 +1642,7 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
if (err)
return err;
}
return generic_block_bmap(mapping, block, get_data_block);
}
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
init_rwsem(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
sbi->total_ext_tree = 0;
atomic_set(&sbi->total_ext_node, 0);
}
int __init create_extent_cache(void)
{
extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
sizeof(struct extent_tree));
if (!extent_tree_slab)
return -ENOMEM;
extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
sizeof(struct extent_node));
if (!extent_node_slab) {
kmem_cache_destroy(extent_tree_slab);
return -ENOMEM;
}
return 0;
}
void destroy_extent_cache(void)
{
kmem_cache_destroy(extent_node_slab);
kmem_cache_destroy(extent_tree_slab);
return generic_block_bmap(mapping, block, get_data_block_bmap);
}
const struct address_space_operations f2fs_dblock_aops = {
......
......@@ -33,8 +33,11 @@ static void update_general_status(struct f2fs_sb_info *sbi)
int i;
/* validation check of the segment numbers */
si->hit_ext = sbi->read_hit_ext;
si->total_ext = sbi->total_hit_ext;
si->hit_largest = atomic_read(&sbi->read_hit_largest);
si->hit_cached = atomic_read(&sbi->read_hit_cached);
si->hit_rbtree = atomic_read(&sbi->read_hit_rbtree);
si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
si->total_ext = atomic_read(&sbi->total_hit_ext);
si->ext_tree = sbi->total_ext_tree;
si->ext_node = atomic_read(&sbi->total_ext_node);
si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
......@@ -49,6 +52,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->valid_count = valid_user_blocks(sbi);
si->valid_node_count = valid_node_count(sbi);
si->valid_inode_count = valid_inode_count(sbi);
si->inline_xattr = atomic_read(&sbi->inline_xattr);
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->utilization = utilization(sbi);
......@@ -226,6 +230,8 @@ static int stat_show(struct seq_file *s, void *v)
seq_printf(s, "Other: %u)\n - Data: %u\n",
si->valid_node_count - si->valid_inode_count,
si->valid_count - si->valid_node_count);
seq_printf(s, " - Inline_xattr Inode: %u\n",
si->inline_xattr);
seq_printf(s, " - Inline_data Inode: %u\n",
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
......@@ -276,10 +282,16 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_data_blks);
seq_printf(s, " - node blocks : %d (%d)\n", si->node_blks,
si->bg_node_blks);
seq_printf(s, "\nExtent Hit Ratio: %d / %d\n",
si->hit_ext, si->total_ext);
seq_printf(s, "\nExtent Tree Count: %d\n", si->ext_tree);
seq_printf(s, "\nExtent Node Count: %d\n", si->ext_node);
seq_puts(s, "\nExtent Cache:\n");
seq_printf(s, " - Hit Count: L1-1:%d L1-2:%d L2:%d\n",
si->hit_largest, si->hit_cached,
si->hit_rbtree);
seq_printf(s, " - Hit Ratio: %d%% (%d / %d)\n",
!si->total_ext ? 0 :
(si->hit_total * 100) / si->total_ext,
si->hit_total, si->total_ext);
seq_printf(s, " - Inner Struct Count: tree: %d, node: %d\n",
si->ext_tree, si->ext_node);
seq_puts(s, "\nBalancing F2FS Async:\n");
seq_printf(s, " - inmem: %4d, wb: %4d\n",
si->inmem_pages, si->wb_pages);
......@@ -366,6 +378,12 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
si->sbi = sbi;
sbi->stat_info = si;
atomic_set(&sbi->total_hit_ext, 0);
atomic_set(&sbi->read_hit_rbtree, 0);
atomic_set(&sbi->read_hit_largest, 0);
atomic_set(&sbi->read_hit_cached, 0);
atomic_set(&sbi->inline_xattr, 0);
atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->inplace_count, 0);
......
......@@ -718,8 +718,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
if (inode)
f2fs_drop_nlink(dir, inode, NULL);
if (bit_pos == NR_DENTRY_IN_BLOCK) {
truncate_hole(dir, page->index, page->index + 1);
if (bit_pos == NR_DENTRY_IN_BLOCK &&
!truncate_hole(dir, page->index, page->index + 1)) {
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
ClearPageUptodate(page);
......
/*
* f2fs extent cache support
*
* Copyright (c) 2015 Motorola Mobility
* Copyright (c) 2015 Samsung Electronics
* Authors: Jaegeuk Kim <jaegeuk@kernel.org>
* Chao Yu <chao2.yu@samsung.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include <trace/events/f2fs.h>
static struct kmem_cache *extent_tree_slab;
static struct kmem_cache *extent_node_slab;
static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node *parent, struct rb_node **p)
{
struct extent_node *en;
en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
if (!en)
return NULL;
en->ei = *ei;
INIT_LIST_HEAD(&en->list);
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
et->count++;
atomic_inc(&sbi->total_ext_node);
return en;
}
static void __detach_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_node *en)
{
rb_erase(&en->rb_node, &et->root);
et->count--;
atomic_dec(&sbi->total_ext_node);
if (et->cached_en == en)
et->cached_en = NULL;
}
static struct extent_tree *__grab_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
nid_t ino = inode->i_ino;
down_write(&sbi->extent_tree_lock);
et = radix_tree_lookup(&sbi->extent_tree_root, ino);
if (!et) {
et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
memset(et, 0, sizeof(struct extent_tree));
et->ino = ino;
et->root = RB_ROOT;
et->cached_en = NULL;
rwlock_init(&et->lock);
atomic_set(&et->refcount, 0);
et->count = 0;
sbi->total_ext_tree++;
}
atomic_inc(&et->refcount);
up_write(&sbi->extent_tree_lock);
/* never died until evict_inode */
F2FS_I(inode)->extent_tree = et;
return et;
}
static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, unsigned int fofs)
{
struct rb_node *node = et->root.rb_node;
struct extent_node *en = et->cached_en;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) {
stat_inc_cached_node_hit(sbi);
return en;
}
}
while (node) {
en = rb_entry(node, struct extent_node, rb_node);
if (fofs < en->ei.fofs) {
node = node->rb_left;
} else if (fofs >= en->ei.fofs + en->ei.len) {
node = node->rb_right;
} else {
stat_inc_rbtree_node_hit(sbi);
return en;
}
}
return NULL;
}
static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei)
{
struct rb_node **p = &et->root.rb_node;
struct extent_node *en;
en = __attach_extent_node(sbi, et, ei, NULL, p);
if (!en)
return NULL;
et->largest = en->ei;
et->cached_en = en;
return en;
}
static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, bool free_all)
{
struct rb_node *node, *next;
struct extent_node *en;
unsigned int count = et->count;
node = rb_first(&et->root);
while (node) {
next = rb_next(node);
en = rb_entry(node, struct extent_node, rb_node);
if (free_all) {
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_del_init(&en->list);
spin_unlock(&sbi->extent_lock);
}
if (free_all || list_empty(&en->list)) {
__detach_extent_node(sbi, et, en);
kmem_cache_free(extent_node_slab, en);
}
node = next;
}
return count - et->count;
}
static void __drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;
if (largest->fofs <= fofs && largest->fofs + largest->len > fofs)
largest->len = 0;
}
void f2fs_drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
if (!f2fs_may_extent_tree(inode))
return;
__drop_largest_extent(inode, fofs);
}
void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et;
struct extent_node *en;
struct extent_info ei;
if (!f2fs_may_extent_tree(inode))
return;
et = __grab_extent_tree(inode);
if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
return;
set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
write_lock(&et->lock);
if (et->count)
goto out;
en = __init_extent_tree(sbi, et, &ei);
if (en) {
spin_lock(&sbi->extent_lock);
list_add_tail(&en->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
}
out:
write_unlock(&et->lock);
}
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_node *en;
bool ret = false;
f2fs_bug_on(sbi, !et);
trace_f2fs_lookup_extent_tree_start(inode, pgofs);
read_lock(&et->lock);
if (et->largest.fofs <= pgofs &&
et->largest.fofs + et->largest.len > pgofs) {
*ei = et->largest;
ret = true;
stat_inc_largest_node_hit(sbi);
goto out;
}
en = __lookup_extent_tree(sbi, et, pgofs);
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
if (!list_empty(&en->list))
list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
spin_unlock(&sbi->extent_lock);
ret = true;
}
out:
stat_inc_total_hit(sbi);
read_unlock(&et->lock);
trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
return ret;
}
/*
* lookup extent at @fofs, if hit, return the extent
* if not, return NULL and
* @prev_ex: extent before fofs
* @next_ex: extent after fofs
* @insert_p: insert point for new extent at fofs
* in order to simpfy the insertion after.
* tree must stay unchanged between lookup and insertion.
*/
static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
unsigned int fofs,
struct extent_node **prev_ex,
struct extent_node **next_ex,
struct rb_node ***insert_p,
struct rb_node **insert_parent)
{
struct rb_node **pnode = &et->root.rb_node;
struct rb_node *parent = NULL, *tmp_node;
struct extent_node *en = et->cached_en;
*insert_p = NULL;
*insert_parent = NULL;
*prev_ex = NULL;
*next_ex = NULL;
if (RB_EMPTY_ROOT(&et->root))
return NULL;
if (en) {
struct extent_info *cei = &en->ei;
if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
goto lookup_neighbors;
}
while (*pnode) {
parent = *pnode;
en = rb_entry(*pnode, struct extent_node, rb_node);
if (fofs < en->ei.fofs)
pnode = &(*pnode)->rb_left;
else if (fofs >= en->ei.fofs + en->ei.len)
pnode = &(*pnode)->rb_right;
else
goto lookup_neighbors;
}
*insert_p = pnode;
*insert_parent = parent;
en = rb_entry(parent, struct extent_node, rb_node);
tmp_node = parent;
if (parent && fofs > en->ei.fofs)
tmp_node = rb_next(parent);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
tmp_node = parent;
if (parent && fofs < en->ei.fofs)
tmp_node = rb_prev(parent);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
return NULL;
lookup_neighbors:
if (fofs == en->ei.fofs) {
/* lookup prev node for merging backward later */
tmp_node = rb_prev(&en->rb_node);
*prev_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
}
if (fofs == en->ei.fofs + en->ei.len - 1) {
/* lookup next node for merging frontward later */
tmp_node = rb_next(&en->rb_node);
*next_ex = tmp_node ?
rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
}
return en;
}
static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct extent_node **den,
struct extent_node *prev_ex,
struct extent_node *next_ex)
{
struct extent_node *en = NULL;
if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
prev_ex->ei.len += ei->len;
ei = &prev_ex->ei;
en = prev_ex;
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
if (en) {
__detach_extent_node(sbi, et, prev_ex);
*den = prev_ex;
}
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
en = next_ex;
}
if (en) {
if (en->ei.len > et->largest.len)
et->largest = en->ei;
et->cached_en = en;
}
return en;
}
static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
struct rb_node **insert_p,
struct rb_node *insert_parent)
{
struct rb_node **p = &et->root.rb_node;
struct rb_node *parent = NULL;
struct extent_node *en = NULL;
if (insert_p && insert_parent) {
parent = insert_parent;
p = insert_p;
goto do_insert;
}
while (*p) {
parent = *p;
en = rb_entry(parent, struct extent_node, rb_node);
if (ei->fofs < en->ei.fofs)
p = &(*p)->rb_left;
else if (ei->fofs >= en->ei.fofs + en->ei.len)
p = &(*p)->rb_right;
else
f2fs_bug_on(sbi, 1);
}
do_insert:
en = __attach_extent_node(sbi, et, ei, parent, p);
if (!en)
return NULL;
if (en->ei.len > et->largest.len)
et->largest = en->ei;
et->cached_en = en;
return en;
}
unsigned int f2fs_update_extent_tree_range(struct inode *inode,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
struct extent_node *prev_en = NULL, *next_en = NULL;
struct extent_info ei, dei, prev;
struct rb_node **insert_p = NULL, *insert_parent = NULL;
unsigned int end = fofs + len;
unsigned int pos = (unsigned int)fofs;
if (!et)
return false;
write_lock(&et->lock);
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
write_unlock(&et->lock);
return false;
}
prev = et->largest;
dei.len = 0;
/* we do not guarantee that the largest extent is cached all the time */
__drop_largest_extent(inode, fofs);
/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en,
&insert_p, &insert_parent);
if (!en) {
if (next_en) {
en = next_en;
f2fs_bug_on(sbi, en->ei.fofs <= pos);
pos = en->ei.fofs;
} else {
/*
* skip searching in the tree since there is no
* larger extent node in the cache.
*/
goto update_extent;
}
}
/* 2. invlidate all extent nodes in range [fofs, fofs + len - 1] */
while (en) {
struct rb_node *node;
if (pos >= end)
break;
dei = en->ei;
en1 = en2 = NULL;
node = rb_next(&en->rb_node);
/*
* 2.1 there are four cases when we invalidate blkaddr in extent
* node, |V: valid address, X: will be invalidated|
*/
/* case#1, invalidate right part of extent node |VVVVVXXXXX| */
if (pos > dei.fofs && end >= dei.fofs + dei.len) {
en->ei.len = pos - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
}
if (__is_extent_same(&dei, &et->largest))
et->largest = en->ei;
goto next;
}
/* case#2, invalidate left part of extent node |XXXXXVVVVV| */
if (pos <= dei.fofs && end < dei.fofs + dei.len) {
en->ei.fofs = end;
en->ei.blk += end - dei.fofs;
en->ei.len -= end - dei.fofs;
if (en->ei.len < F2FS_MIN_EXTENT_LEN) {
__detach_extent_node(sbi, et, en);
insert_p = NULL;
insert_parent = NULL;
goto update;
}
if (__is_extent_same(&dei, &et->largest))
et->largest = en->ei;
goto next;
}
__detach_extent_node(sbi, et, en);
/*
* if we remove node in rb-tree, our parent node pointer may
* point the wrong place, discard them.
*/
insert_p = NULL;
insert_parent = NULL;
/* case#3, invalidate entire extent node |XXXXXXXXXX| */
if (pos <= dei.fofs && end >= dei.fofs + dei.len) {
if (__is_extent_same(&dei, &et->largest))
et->largest.len = 0;
goto update;
}
/*
* case#4, invalidate data in the middle of extent node
* |VVVXXXXVVV|
*/
if (dei.len > F2FS_MIN_EXTENT_LEN) {
unsigned int endofs;
/* insert left part of split extent into cache */
if (pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, dei.fofs, dei.blk,
pos - dei.fofs);
en1 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
/* insert right part of split extent into cache */
endofs = dei.fofs + dei.len;
if (endofs - end >= F2FS_MIN_EXTENT_LEN) {
set_extent_info(&ei, end,
end - dei.fofs + dei.blk,
endofs - end);
en2 = __insert_extent_tree(sbi, et, &ei,
NULL, NULL);
}
}
update:
/* 2.2 update in global extent list */
spin_lock(&sbi->extent_lock);
if (en && !list_empty(&en->list))
list_del(&en->list);
if (en1)
list_add_tail(&en1->list, &sbi->extent_list);
if (en2)
list_add_tail(&en2->list, &sbi->extent_list);
spin_unlock(&sbi->extent_lock);
/* 2.3 release extent node */
if (en)
kmem_cache_free(extent_node_slab, en);
next:
en = node ? rb_entry(node, struct extent_node, rb_node) : NULL;
next_en = en;
if (en)
pos = en->ei.fofs;
}
update_extent:
/* 3. update extent in extent cache */
if (blkaddr) {
struct extent_node *den = NULL;
set_extent_info(&ei, fofs, blkaddr, len);
en3 = __try_merge_extent_node(sbi, et, &ei, &den,
prev_en, next_en);
if (!en3)
en3 = __insert_extent_tree(sbi, et, &ei,
insert_p, insert_parent);
/* give up extent_cache, if split and small updates happen */
if (dei.len >= 1 &&
prev.len < F2FS_MIN_EXTENT_LEN &&
et->largest.len < F2FS_MIN_EXTENT_LEN) {
et->largest.len = 0;
set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
}
spin_lock(&sbi->extent_lock);
if (en3) {
if (list_empty(&en3->list))
list_add_tail(&en3->list, &sbi->extent_list);
else
list_move_tail(&en3->list, &sbi->extent_list);
}
if (den && !list_empty(&den->list))
list_del(&den->list);
spin_unlock(&sbi->extent_lock);
if (den)
kmem_cache_free(extent_node_slab, den);
}
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
__free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
return !__is_extent_same(&prev, &et->largest);
}
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
struct extent_node *en, *tmp;
unsigned long ino = F2FS_ROOT_INO(sbi);
struct radix_tree_root *root = &sbi->extent_tree_root;
unsigned int found;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
if (!test_opt(sbi, EXTENT_CACHE))
return 0;
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
/* 1. remove unreferenced extent tree */
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
if (!atomic_read(&et->refcount)) {
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
radix_tree_delete(root, et->ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
tree_cnt++;
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
}
}
}
up_write(&sbi->extent_tree_lock);
/* 2. remove LRU extent entries */
if (!down_write_trylock(&sbi->extent_tree_lock))
goto out;
remained = nr_shrink - (node_cnt + tree_cnt);
spin_lock(&sbi->extent_lock);
list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
if (!remained--)
break;
list_del_init(&en->list);
}
spin_unlock(&sbi->extent_lock);
while ((found = radix_tree_gang_lookup(root,
(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
unsigned i;
ino = treevec[found - 1]->ino + 1;
for (i = 0; i < found; i++) {
struct extent_tree *et = treevec[i];
write_lock(&et->lock);
node_cnt += __free_extent_tree(sbi, et, false);
write_unlock(&et->lock);
if (node_cnt + tree_cnt >= nr_shrink)
break;
}
}
unlock_out:
up_write(&sbi->extent_tree_lock);
out:
trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
return node_cnt + tree_cnt;
}
unsigned int f2fs_destroy_extent_node(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
if (!et)
return 0;
write_lock(&et->lock);
node_cnt = __free_extent_tree(sbi, et, true);
write_unlock(&et->lock);
return node_cnt;
}
void f2fs_destroy_extent_tree(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct extent_tree *et = F2FS_I(inode)->extent_tree;
unsigned int node_cnt = 0;
if (!et)
return;
if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
atomic_dec(&et->refcount);
return;
}
/* free all extent info belong to this extent tree */
node_cnt = f2fs_destroy_extent_node(inode);
/* delete extent tree entry in radix tree */
down_write(&sbi->extent_tree_lock);
atomic_dec(&et->refcount);
f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
kmem_cache_free(extent_tree_slab, et);
sbi->total_ext_tree--;
up_write(&sbi->extent_tree_lock);
F2FS_I(inode)->extent_tree = NULL;
trace_f2fs_destroy_extent_tree(inode, node_cnt);
}
bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
struct extent_info *ei)
{
if (!f2fs_may_extent_tree(inode))
return false;
return f2fs_lookup_extent_tree(inode, pgofs, ei);
}
void f2fs_update_extent_cache(struct dnode_of_data *dn)
{
struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs;
if (!f2fs_may_extent_tree(dn->inode))
return;
f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
dn->ofs_in_node;
if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
sync_inode_page(dn);
}
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
pgoff_t fofs, block_t blkaddr, unsigned int len)
{
if (!f2fs_may_extent_tree(dn->inode))
return;
if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len))
sync_inode_page(dn);
}
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
init_rwsem(&sbi->extent_tree_lock);
INIT_LIST_HEAD(&sbi->extent_list);
spin_lock_init(&sbi->extent_lock);
sbi->total_ext_tree = 0;
atomic_set(&sbi->total_ext_node, 0);
}
int __init create_extent_cache(void)
{
extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
sizeof(struct extent_tree));
if (!extent_tree_slab)
return -ENOMEM;
extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
sizeof(struct extent_node));
if (!extent_node_slab) {
kmem_cache_destroy(extent_tree_slab);
return -ENOMEM;
}
return 0;
}
void destroy_extent_cache(void)
{
kmem_cache_destroy(extent_node_slab);
kmem_cache_destroy(extent_tree_slab);
}
......@@ -19,6 +19,7 @@
#include <linux/magic.h>
#include <linux/kobject.h>
#include <linux/sched.h>
#include <linux/bio.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
......@@ -228,6 +229,7 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6)
#define F2FS_IOC_SET_ENCRYPTION_POLICY \
_IOR('f', 19, struct f2fs_encryption_policy)
......@@ -320,7 +322,7 @@ enum {
*/
};
#define F2FS_LINK_MAX 32000 /* maximum link count per file */
#define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */
#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */
......@@ -349,6 +351,7 @@ struct extent_tree {
nid_t ino; /* inode number */
struct rb_root root; /* root of extent info rb-tree */
struct extent_node *cached_en; /* recently accessed extent node */
struct extent_info largest; /* largested extent info */
rwlock_t lock; /* protect extent info rb-tree */
atomic_t refcount; /* reference count of rb-tree */
unsigned int count; /* # of extent node in rb-tree*/
......@@ -372,6 +375,12 @@ struct f2fs_map_blocks {
unsigned int m_flags;
};
/* for flag in get_data_block */
#define F2FS_GET_BLOCK_READ 0
#define F2FS_GET_BLOCK_DIO 1
#define F2FS_GET_BLOCK_FIEMAP 2
#define F2FS_GET_BLOCK_BMAP 3
/*
* i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
*/
......@@ -420,14 +429,13 @@ struct f2fs_inode_info {
unsigned int clevel; /* maximum level of given file name */
nid_t i_xattr_nid; /* node id that contains xattrs */
unsigned long long xattr_ver; /* cp version of xattr modification */
struct extent_info ext; /* in-memory extent cache entry */
rwlock_t ext_lock; /* rwlock for single extent cache */
struct inode_entry *dirty_dir; /* the pointer of dirty dir */
struct radix_tree_root inmem_root; /* radix tree for inmem pages */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
struct extent_tree *extent_tree; /* cached extent_tree entry */
#ifdef CONFIG_F2FS_FS_ENCRYPTION
/* Encryption params */
struct f2fs_crypt_info *i_crypt_info;
......@@ -779,7 +787,11 @@ struct f2fs_sb_info {
unsigned int segment_count[2]; /* # of allocated segments */
unsigned int block_count[2]; /* # of allocated blocks */
atomic_t inplace_count; /* # of inplace update */
int total_hit_ext, read_hit_ext; /* extent cache hit ratio */
atomic_t total_hit_ext; /* # of lookup extent cache */
atomic_t read_hit_rbtree; /* # of hit rbtree extent node */
atomic_t read_hit_largest; /* # of hit largest extent node */
atomic_t read_hit_cached; /* # of hit cached extent node */
atomic_t inline_xattr; /* # of inline_xattr inodes */
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
int bg_gc; /* background gc calls */
......@@ -791,6 +803,11 @@ struct f2fs_sb_info {
/* For sysfs suppport */
struct kobject s_kobj;
struct completion s_kobj_unregister;
/* For shrinker support */
struct list_head s_list;
struct mutex umount_mutex;
unsigned int shrinker_run_no;
};
/*
......@@ -1039,7 +1056,8 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type)
static inline void inode_dec_dirty_pages(struct inode *inode)
{
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
!S_ISLNK(inode->i_mode))
return;
atomic_dec(&F2FS_I(inode)->dirty_pages);
......@@ -1234,16 +1252,24 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
gfp_t flags)
{
void *entry;
retry:
entry = kmem_cache_alloc(cachep, flags);
if (!entry) {
cond_resched();
goto retry;
}
entry = kmem_cache_alloc(cachep, flags);
if (!entry)
entry = kmem_cache_alloc(cachep, flags | __GFP_NOFAIL);
return entry;
}
static inline struct bio *f2fs_bio_alloc(int npages)
{
struct bio *bio;
/* No failure on bio allocation */
bio = bio_alloc(GFP_NOIO, npages);
if (!bio)
bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages);
return bio;
}
static inline void f2fs_radix_tree_insert(struct radix_tree_root *root,
unsigned long index, void *item)
{
......@@ -1342,6 +1368,7 @@ enum {
FI_INC_LINK, /* need to increment i_nlink */
FI_ACL_MODE, /* indicate acl mode */
FI_NO_ALLOC, /* should not allocate any blocks */
FI_FREE_NID, /* free allocated nide */
FI_UPDATE_DIR, /* should update inode block for consistency */
FI_DELAY_IPUT, /* used for the recovery */
FI_NO_EXTENT, /* not to use the extent cache */
......@@ -1541,6 +1568,17 @@ static inline bool is_dot_dotdot(const struct qstr *str)
return false;
}
static inline bool f2fs_may_extent_tree(struct inode *inode)
{
mode_t mode = inode->i_mode;
if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) ||
is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
return false;
return S_ISREG(mode);
}
#define get_inode_mode(i) \
((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
......@@ -1557,7 +1595,7 @@ static inline bool is_dot_dotdot(const struct qstr *str)
int f2fs_sync_file(struct file *, loff_t, loff_t, int);
void truncate_data_blocks(struct dnode_of_data *);
int truncate_blocks(struct inode *, u64, bool);
void f2fs_truncate(struct inode *);
int f2fs_truncate(struct inode *, bool);
int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int f2fs_setattr(struct dentry *, struct iattr *);
int truncate_hole(struct inode *, pgoff_t, pgoff_t);
......@@ -1649,7 +1687,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
int truncate_xattr_node(struct inode *, struct page *);
int wait_on_node_pages_writeback(struct f2fs_sb_info *, nid_t);
void remove_inode_page(struct inode *);
int remove_inode_page(struct inode *);
struct page *new_inode_page(struct inode *);
struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *);
void ra_node_page(struct f2fs_sb_info *, nid_t);
......@@ -1660,6 +1698,7 @@ int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *);
bool alloc_nid(struct f2fs_sb_info *, nid_t *);
void alloc_nid_done(struct f2fs_sb_info *, nid_t);
void alloc_nid_failed(struct f2fs_sb_info *, nid_t);
int try_to_free_nids(struct f2fs_sb_info *, int);
void recover_inline_xattr(struct inode *, struct page *);
void recover_xattr_data(struct inode *, struct page *, block_t);
int recover_inode_page(struct f2fs_sb_info *, struct page *);
......@@ -1675,7 +1714,7 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void register_inmem_page(struct inode *, struct page *);
void commit_inmem_pages(struct inode *, bool);
int commit_inmem_pages(struct inode *, bool);
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
......@@ -1685,7 +1724,7 @@ void invalidate_blocks(struct f2fs_sb_info *, block_t);
void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t);
void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *);
void release_discard_addrs(struct f2fs_sb_info *);
void discard_next_dnode(struct f2fs_sb_info *, block_t);
bool discard_next_dnode(struct f2fs_sb_info *, block_t);
int npages_for_summary_flush(struct f2fs_sb_info *, bool);
void allocate_new_segments(struct f2fs_sb_info *);
int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *);
......@@ -1727,7 +1766,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *);
void release_orphan_inode(struct f2fs_sb_info *);
void add_orphan_inode(struct f2fs_sb_info *, nid_t);
void remove_orphan_inode(struct f2fs_sb_info *, nid_t);
void recover_orphan_inodes(struct f2fs_sb_info *);
int recover_orphan_inodes(struct f2fs_sb_info *);
int get_valid_checkpoint(struct f2fs_sb_info *);
void update_dirty_page(struct inode *, struct page *);
void add_dirty_dir_inode(struct inode *);
......@@ -1746,21 +1785,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *);
void f2fs_submit_page_mbio(struct f2fs_io_info *);
void set_data_blkaddr(struct dnode_of_data *);
int reserve_new_block(struct dnode_of_data *);
int f2fs_get_block(struct dnode_of_data *, pgoff_t);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
void f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
void f2fs_destroy_extent_tree(struct inode *);
void f2fs_init_extent_cache(struct inode *, struct f2fs_extent *);
void f2fs_update_extent_cache(struct dnode_of_data *);
void f2fs_preserve_extent_tree(struct inode *);
struct page *get_read_data_page(struct inode *, pgoff_t, int);
struct page *find_data_page(struct inode *, pgoff_t);
struct page *get_lock_data_page(struct inode *, pgoff_t);
struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool);
int do_write_data_page(struct f2fs_io_info *);
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64);
void init_extent_cache_info(struct f2fs_sb_info *);
int __init create_extent_cache(void);
void destroy_extent_cache(void);
void f2fs_invalidate_page(struct page *, unsigned int, unsigned int);
int f2fs_release_page(struct page *, gfp_t);
......@@ -1788,11 +1820,13 @@ struct f2fs_stat_info {
struct f2fs_sb_info *sbi;
int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
int main_area_segs, main_area_sections, main_area_zones;
int hit_ext, total_ext, ext_tree, ext_node;
int hit_largest, hit_cached, hit_rbtree, hit_total, total_ext;
int ext_tree, ext_node;
int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta;
int nats, dirty_nats, sits, dirty_sits, fnids;
int total_count, utilization;
int bg_gc, inline_inode, inline_dir, inmem_pages, wb_pages;
int bg_gc, inmem_pages, wb_pages;
int inline_xattr, inline_inode, inline_dir;
unsigned int valid_count, valid_node_count, valid_inode_count;
unsigned int bimodal, avg_vblocks;
int util_free, util_valid, util_invalid;
......@@ -1823,8 +1857,20 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++)
#define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++)
#define stat_dec_dirty_dir(sbi) ((sbi)->n_dirty_dirs--)
#define stat_inc_total_hit(sb) ((F2FS_SB(sb))->total_hit_ext++)
#define stat_inc_read_hit(sb) ((F2FS_SB(sb))->read_hit_ext++)
#define stat_inc_total_hit(sbi) (atomic_inc(&(sbi)->total_hit_ext))
#define stat_inc_rbtree_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_rbtree))
#define stat_inc_largest_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_largest))
#define stat_inc_cached_node_hit(sbi) (atomic_inc(&(sbi)->read_hit_cached))
#define stat_inc_inline_xattr(inode) \
do { \
if (f2fs_has_inline_xattr(inode)) \
(atomic_inc(&F2FS_I_SB(inode)->inline_xattr)); \
} while (0)
#define stat_dec_inline_xattr(inode) \
do { \
if (f2fs_has_inline_xattr(inode)) \
(atomic_dec(&F2FS_I_SB(inode)->inline_xattr)); \
} while (0)
#define stat_inc_inline_inode(inode) \
do { \
if (f2fs_has_inline_data(inode)) \
......@@ -1894,7 +1940,11 @@ void f2fs_destroy_root_stats(void);
#define stat_inc_dirty_dir(sbi)
#define stat_dec_dirty_dir(sbi)
#define stat_inc_total_hit(sb)
#define stat_inc_read_hit(sb)
#define stat_inc_rbtree_node_hit(sb)
#define stat_inc_largest_node_hit(sbi)
#define stat_inc_cached_node_hit(sbi)
#define stat_inc_inline_xattr(inode)
#define stat_dec_inline_xattr(inode)
#define stat_inc_inline_inode(inode)
#define stat_dec_inline_inode(inode)
#define stat_inc_inline_dir(inode)
......@@ -1949,6 +1999,30 @@ bool f2fs_empty_inline_dir(struct inode *);
int f2fs_read_inline_dir(struct file *, struct dir_context *,
struct f2fs_str *);
/*
* shrinker.c
*/
unsigned long f2fs_shrink_count(struct shrinker *, struct shrink_control *);
unsigned long f2fs_shrink_scan(struct shrinker *, struct shrink_control *);
void f2fs_join_shrinker(struct f2fs_sb_info *);
void f2fs_leave_shrinker(struct f2fs_sb_info *);
/*
* extent_cache.c
*/
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int);
void f2fs_drop_largest_extent(struct inode *, pgoff_t);
void f2fs_init_extent_tree(struct inode *, struct f2fs_extent *);
unsigned int f2fs_destroy_extent_node(struct inode *);
void f2fs_destroy_extent_tree(struct inode *);
bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *);
void f2fs_update_extent_cache(struct dnode_of_data *);
void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
pgoff_t, block_t, unsigned int);
void init_extent_cache_info(struct f2fs_sb_info *);
int __init create_extent_cache(void);
void destroy_extent_cache(void);
/*
* crypto support
*/
......
......@@ -27,6 +27,7 @@
#include "segment.h"
#include "xattr.h"
#include "acl.h"
#include "gc.h"
#include "trace.h"
#include <trace/events/f2fs.h>
......@@ -85,6 +86,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
mapped:
/* fill the page */
f2fs_wait_on_page_writeback(page, DATA);
/* if gced page is attached, don't write to cold segment */
clear_cold_data(page);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(err);
......@@ -203,8 +206,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
/* if the inode is dirty, let's recover all the time */
if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) {
update_inode_page(inode);
if (!datasync) {
f2fs_write_inode(inode, NULL);
goto go_write;
}
......@@ -442,9 +445,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
{
int nr_free = 0, ofs = dn->ofs_in_node;
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
struct f2fs_node *raw_node;
int nr_free = 0, ofs = dn->ofs_in_node, len = count;
__le32 *addr;
raw_node = F2FS_NODE(dn->node_page);
......@@ -457,14 +460,22 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
dn->data_blkaddr = NULL_ADDR;
set_data_blkaddr(dn);
f2fs_update_extent_cache(dn);
invalidate_blocks(sbi, blkaddr);
if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page))
clear_inode_flag(F2FS_I(dn->inode),
FI_FIRST_BLOCK_WRITTEN);
nr_free++;
}
if (nr_free) {
pgoff_t fofs;
/*
* once we invalidate valid blkaddr in range [ofs, ofs + count],
* we will invalidate all blkaddr in the whole range.
*/
fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
F2FS_I(dn->inode)) + ofs;
f2fs_update_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free);
set_page_dirty(dn->node_page);
sync_inode_page(dn);
......@@ -576,24 +587,30 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
return err;
}
void f2fs_truncate(struct inode *inode)
int f2fs_truncate(struct inode *inode, bool lock)
{
int err;
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
return;
return 0;
trace_f2fs_truncate(inode);
/* we should check inline_data size */
if (f2fs_has_inline_data(inode) && !f2fs_may_inline_data(inode)) {
if (f2fs_convert_inline_inode(inode))
return;
err = f2fs_convert_inline_inode(inode);
if (err)
return err;
}
if (!truncate_blocks(inode, i_size_read(inode), true)) {
err = truncate_blocks(inode, i_size_read(inode), lock);
if (err)
return err;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
}
return 0;
}
int f2fs_getattr(struct vfsmount *mnt,
......@@ -653,7 +670,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_size <= i_size_read(inode)) {
truncate_setsize(inode, attr->ia_size);
f2fs_truncate(inode);
err = f2fs_truncate(inode, true);
if (err)
return err;
f2fs_balance_fs(F2FS_I_SB(inode));
} else {
/*
......@@ -692,14 +711,14 @@ const struct inode_operations f2fs_file_inode_operations = {
.fiemap = f2fs_fiemap,
};
static void fill_zero(struct inode *inode, pgoff_t index,
static int fill_zero(struct inode *inode, pgoff_t index,
loff_t start, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *page;
if (!len)
return;
return 0;
f2fs_balance_fs(sbi);
......@@ -707,12 +726,14 @@ static void fill_zero(struct inode *inode, pgoff_t index,
page = get_new_data_page(inode, NULL, index, false);
f2fs_unlock_op(sbi);
if (!IS_ERR(page)) {
if (IS_ERR(page))
return PTR_ERR(page);
f2fs_wait_on_page_writeback(page, DATA);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
}
return 0;
}
int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
......@@ -760,14 +781,22 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
if (pg_start == pg_end) {
fill_zero(inode, pg_start, off_start,
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
return ret;
} else {
if (off_start)
fill_zero(inode, pg_start++, off_start,
if (off_start) {
ret = fill_zero(inode, pg_start++, off_start,
PAGE_CACHE_SIZE - off_start);
if (off_end)
fill_zero(inode, pg_end, 0, off_end);
if (ret)
return ret;
}
if (off_end) {
ret = fill_zero(inode, pg_end, 0, off_end);
if (ret)
return ret;
}
if (pg_start < pg_end) {
struct address_space *mapping = inode->i_mapping;
......@@ -797,11 +826,11 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
int ret = 0;
f2fs_lock_op(sbi);
for (; end < nrpages; start++, end++) {
block_t new_addr, old_addr;
f2fs_lock_op(sbi);
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = get_dnode_of_data(&dn, end, LOOKUP_NODE_RA);
if (ret && ret != -ENOENT) {
......@@ -817,13 +846,16 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
if (new_addr == NULL_ADDR) {
set_new_dnode(&dn, inode, NULL, NULL, 0);
ret = get_dnode_of_data(&dn, start, LOOKUP_NODE_RA);
if (ret && ret != -ENOENT)
if (ret && ret != -ENOENT) {
goto out;
else if (ret == -ENOENT)
} else if (ret == -ENOENT) {
f2fs_unlock_op(sbi);
continue;
}
if (dn.data_blkaddr == NULL_ADDR) {
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
continue;
} else {
truncate_data_blocks_range(&dn, 1);
......@@ -862,8 +894,9 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end)
f2fs_put_dnode(&dn);
}
f2fs_unlock_op(sbi);
}
ret = 0;
return 0;
out:
f2fs_unlock_op(sbi);
return ret;
......@@ -885,6 +918,14 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (offset & (F2FS_BLKSIZE - 1) || len & (F2FS_BLKSIZE - 1))
return -EINVAL;
f2fs_balance_fs(F2FS_I_SB(inode));
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
pg_start = offset >> PAGE_CACHE_SHIFT;
pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
......@@ -946,14 +987,21 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
if (pg_start == pg_end) {
fill_zero(inode, pg_start, off_start, off_end - off_start);
ret = fill_zero(inode, pg_start, off_start,
off_end - off_start);
if (ret)
return ret;
if (offset + len > new_size)
new_size = offset + len;
new_size = max_t(loff_t, new_size, offset + len);
} else {
if (off_start) {
fill_zero(inode, pg_start++, off_start,
ret = fill_zero(inode, pg_start++, off_start,
PAGE_CACHE_SIZE - off_start);
if (ret)
return ret;
new_size = max_t(loff_t, new_size,
pg_start << PAGE_CACHE_SHIFT);
}
......@@ -995,7 +1043,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
}
if (off_end) {
fill_zero(inode, pg_end, 0, off_end);
ret = fill_zero(inode, pg_end, 0, off_end);
if (ret)
goto out;
new_size = max_t(loff_t, new_size, offset + len);
}
}
......@@ -1033,6 +1084,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi);
if (f2fs_has_inline_data(inode)) {
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
}
ret = truncate_blocks(inode, i_size_read(inode), true);
if (ret)
return ret;
......@@ -1302,6 +1359,7 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
static int f2fs_ioc_start_atomic_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
if (!inode_owner_or_capable(inode))
return -EACCES;
......@@ -1311,9 +1369,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
if (f2fs_is_atomic_file(inode))
return 0;
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
return f2fs_convert_inline_inode(inode);
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
return 0;
}
static int f2fs_ioc_commit_atomic_write(struct file *filp)
......@@ -1333,10 +1394,13 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
commit_inmem_pages(inode, false);
ret = commit_inmem_pages(inode, false);
if (ret)
goto err_out;
}
ret = f2fs_sync_file(filp, 0, LONG_MAX, 0);
ret = f2fs_sync_file(filp, 0, LLONG_MAX, 0);
err_out:
mnt_drop_write_file(filp);
return ret;
}
......@@ -1344,6 +1408,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
static int f2fs_ioc_start_volatile_write(struct file *filp)
{
struct inode *inode = file_inode(filp);
int ret;
if (!inode_owner_or_capable(inode))
return -EACCES;
......@@ -1351,9 +1416,12 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
if (f2fs_is_volatile_file(inode))
return 0;
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
ret = f2fs_convert_inline_inode(inode);
if (ret)
return ret;
return f2fs_convert_inline_inode(inode);
set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
return 0;
}
static int f2fs_ioc_release_volatile_write(struct file *filp)
......@@ -1389,7 +1457,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
commit_inmem_pages(inode, false);
commit_inmem_pages(inode, true);
}
if (f2fs_is_volatile_file(inode))
......@@ -1544,6 +1612,35 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
return 0;
}
static int f2fs_ioc_gc(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
__u32 i, count;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (get_user(count, (__u32 __user *)arg))
return -EFAULT;
if (!count || count > F2FS_BATCH_GC_MAX_NUM)
return -EINVAL;
for (i = 0; i < count; i++) {
if (!mutex_trylock(&sbi->gc_mutex))
break;
if (f2fs_gc(sbi))
break;
}
if (put_user(i, (__u32 __user *)arg))
return -EFAULT;
return 0;
}
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
......@@ -1573,6 +1670,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_get_encryption_policy(filp, arg);
case F2FS_IOC_GET_ENCRYPTION_PWSALT:
return f2fs_ioc_get_encryption_pwsalt(filp, arg);
case F2FS_IOC_GARBAGE_COLLECT:
return f2fs_ioc_gc(filp, arg);
default:
return -ENOTTY;
}
......
......@@ -391,23 +391,27 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
* On validity, copy that node with cold status, otherwise (invalid node)
* ignore that.
*/
static void gc_node_segment(struct f2fs_sb_info *sbi,
static int gc_node_segment(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum, unsigned int segno, int gc_type)
{
bool initial = true;
struct f2fs_summary *entry;
block_t start_addr;
int off;
start_addr = START_BLOCK(sbi, segno);
next_step:
entry = sum;
for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
struct node_info ni;
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
return;
return 0;
if (check_valid_map(sbi, segno, off) == 0)
continue;
......@@ -426,6 +430,12 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
continue;
}
get_node_info(sbi, nid, &ni);
if (ni.blk_addr != start_addr + off) {
f2fs_put_page(node_page, 1);
continue;
}
/* set page dirty and write it */
if (gc_type == FG_GC) {
f2fs_wait_on_page_writeback(node_page, NODE);
......@@ -451,13 +461,11 @@ static void gc_node_segment(struct f2fs_sb_info *sbi,
};
sync_node_pages(sbi, 0, &wbc);
/*
* In the case of FG_GC, it'd be better to reclaim this victim
* completely.
*/
if (get_valid_blocks(sbi, segno, 1) != 0)
goto next_step;
/* return 1 only if FG_GC succefully reclaimed one */
if (get_valid_blocks(sbi, segno, 1) == 0)
return 1;
}
return 0;
}
/*
......@@ -487,7 +495,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
}
static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct node_info *dni, block_t blkaddr, unsigned int *nofs)
{
struct page *node_page;
......@@ -500,13 +508,13 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
node_page = get_node_page(sbi, nid);
if (IS_ERR(node_page))
return 0;
return false;
get_node_info(sbi, nid, dni);
if (sum->version != dni->version) {
f2fs_put_page(node_page, 1);
return 0;
return false;
}
*nofs = ofs_of_node(node_page);
......@@ -514,8 +522,8 @@ static int check_dnode(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
f2fs_put_page(node_page, 1);
if (source_blkaddr != blkaddr)
return 0;
return 1;
return false;
return true;
}
static void move_encrypted_block(struct inode *inode, block_t bidx)
......@@ -552,7 +560,10 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
fio.page = page;
fio.blk_addr = dn.data_blkaddr;
fio.encrypted_page = grab_cache_page(META_MAPPING(fio.sbi), fio.blk_addr);
fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi),
fio.blk_addr,
FGP_LOCK|FGP_CREAT,
GFP_NOFS);
if (!fio.encrypted_page)
goto put_out;
......@@ -636,7 +647,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
* If the parent node is not valid or the data block address is different,
* the victim data block is ignored.
*/
static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
{
struct super_block *sb = sbi->sb;
......@@ -659,7 +670,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
return;
return 0;
if (check_valid_map(sbi, segno, off) == 0)
continue;
......@@ -670,7 +681,7 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
}
/* Get an inode by ino with checking validity */
if (check_dnode(sbi, entry, &dni, start_addr + off, &nofs) == 0)
if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
continue;
if (phase == 1) {
......@@ -724,15 +735,11 @@ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (gc_type == FG_GC) {
f2fs_submit_merged_bio(sbi, DATA, WRITE);
/*
* In the case of FG_GC, it'd be better to reclaim this victim
* completely.
*/
if (get_valid_blocks(sbi, segno, 1) != 0) {
phase = 2;
goto next_step;
}
/* return 1 only if FG_GC succefully reclaimed one */
if (get_valid_blocks(sbi, segno, 1) == 0)
return 1;
}
return 0;
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
......@@ -748,12 +755,13 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
return ret;
}
static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
struct gc_inode_list *gc_list, int gc_type)
{
struct page *sum_page;
struct f2fs_summary_block *sum;
struct blk_plug plug;
int nfree = 0;
/* read segment summary of victim */
sum_page = get_sum_page(sbi, segno);
......@@ -773,10 +781,11 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
switch (GET_SUM_TYPE((&sum->footer))) {
case SUM_TYPE_NODE:
gc_node_segment(sbi, sum->entries, segno, gc_type);
nfree = gc_node_segment(sbi, sum->entries, segno, gc_type);
break;
case SUM_TYPE_DATA:
gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type);
nfree = gc_data_segment(sbi, sum->entries, gc_list,
segno, gc_type);
break;
}
blk_finish_plug(&plug);
......@@ -785,11 +794,13 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
stat_inc_call_count(sbi->stat_info);
f2fs_put_page(sum_page, 0);
return nfree;
}
int f2fs_gc(struct f2fs_sb_info *sbi)
{
unsigned int segno, i;
unsigned int segno = NULL_SEGNO;
unsigned int i;
int gc_type = BG_GC;
int nfree = 0;
int ret = -1;
......@@ -808,10 +819,11 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) {
gc_type = FG_GC;
if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
write_checkpoint(sbi, &cpc);
}
if (!__get_victim(sbi, &segno, gc_type))
if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
......@@ -821,13 +833,10 @@ int f2fs_gc(struct f2fs_sb_info *sbi)
META_SSA);
for (i = 0; i < sbi->segs_per_sec; i++)
do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
nfree += do_garbage_collect(sbi, segno + i, &gc_list, gc_type);
if (gc_type == FG_GC) {
if (gc_type == FG_GC)
sbi->cur_victim_sec = NULL_SEGNO;
nfree++;
WARN_ON(get_valid_blocks(sbi, segno, sbi->segs_per_sec));
}
if (has_not_enough_free_secs(sbi, nfree))
goto gc_more;
......
......@@ -19,6 +19,12 @@
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
/*
* with this macro, we can control the max time we do garbage collection,
* when user triggers batch mode gc by ioctl.
*/
#define F2FS_BATCH_GC_MAX_NUM 16
/* Search max. number of dirty segments to select a victim segment */
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
......
......@@ -360,6 +360,10 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent,
return 0;
}
/*
* NOTE: ipage is grabbed by caller, but if any error occurs, we should
* release ipage in this function.
*/
static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
struct f2fs_inline_dentry *inline_dentry)
{
......@@ -369,8 +373,10 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
int err;
page = grab_cache_page(dir->i_mapping, 0);
if (!page)
if (!page) {
f2fs_put_page(ipage, 1);
return -ENOMEM;
}
set_new_dnode(&dn, dir, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, 0);
......@@ -378,13 +384,21 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
goto out;
f2fs_wait_on_page_writeback(page, DATA);
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
dentry_blk = kmap_atomic(page);
/* copy data from inline dentry block to new dentry block */
memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap,
INLINE_DENTRY_BITMAP_SIZE);
memset(dentry_blk->dentry_bitmap + INLINE_DENTRY_BITMAP_SIZE, 0,
SIZE_OF_DENTRY_BITMAP - INLINE_DENTRY_BITMAP_SIZE);
/*
* we do not need to zero out remainder part of dentry and filename
* field, since we have used bitmap for marking the usage status of
* them, besides, we can also ignore copying/zeroing reserved space
* of dentry block, because them haven't been used so far.
*/
memcpy(dentry_blk->dentry, inline_dentry->dentry,
sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY);
memcpy(dentry_blk->filename, inline_dentry->filename,
......@@ -434,7 +448,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
slots, NR_INLINE_DENTRY);
if (bit_pos >= NR_INLINE_DENTRY) {
err = f2fs_convert_inline_dir(dir, ipage, dentry_blk);
if (!err)
if (err)
return err;
err = -EAGAIN;
goto out;
}
......
......@@ -12,7 +12,6 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/bitops.h>
#include "f2fs.h"
#include "node.h"
......@@ -34,8 +33,8 @@ void f2fs_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & FS_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
set_mask_bits(&inode->i_flags,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
}
static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
......@@ -139,7 +138,7 @@ static int do_read_inode(struct inode *inode)
fi->i_pino = le32_to_cpu(ri->i_pino);
fi->i_dir_level = ri->i_dir_level;
f2fs_init_extent_cache(inode, &ri->i_ext);
f2fs_init_extent_tree(inode, &ri->i_ext);
get_inline_info(fi, ri);
......@@ -155,6 +154,7 @@ static int do_read_inode(struct inode *inode)
f2fs_put_page(node_page, 1);
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
......@@ -237,10 +237,11 @@ void update_inode(struct inode *inode, struct page *node_page)
ri->i_size = cpu_to_le64(i_size_read(inode));
ri->i_blocks = cpu_to_le64(inode->i_blocks);
read_lock(&F2FS_I(inode)->ext_lock);
set_raw_extent(&F2FS_I(inode)->ext, &ri->i_ext);
read_unlock(&F2FS_I(inode)->ext_lock);
if (F2FS_I(inode)->extent_tree)
set_raw_extent(&F2FS_I(inode)->extent_tree->largest,
&ri->i_ext);
else
memset(&ri->i_ext, 0, sizeof(ri->i_ext));
set_raw_inline(F2FS_I(inode), ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
......@@ -314,7 +315,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
void f2fs_evict_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
struct f2fs_inode_info *fi = F2FS_I(inode);
nid_t xnid = fi->i_xattr_nid;
int err = 0;
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
......@@ -330,41 +333,62 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_bug_on(sbi, get_dirty_pages(inode));
remove_dirty_dir_inode(inode);
f2fs_destroy_extent_tree(inode);
if (inode->i_nlink || is_bad_inode(inode))
goto no_delete;
sb_start_intwrite(inode->i_sb);
set_inode_flag(F2FS_I(inode), FI_NO_ALLOC);
set_inode_flag(fi, FI_NO_ALLOC);
i_size_write(inode, 0);
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
err = f2fs_truncate(inode, true);
if (!err) {
f2fs_lock_op(sbi);
remove_inode_page(inode);
err = remove_inode_page(inode);
f2fs_unlock_op(sbi);
}
sb_end_intwrite(inode->i_sb);
no_delete:
stat_dec_inline_xattr(inode);
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
/* update extent info in inode */
if (inode->i_nlink)
f2fs_preserve_extent_tree(inode);
f2fs_destroy_extent_tree(inode);
invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino);
if (xnid)
invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid);
if (is_inode_flag_set(F2FS_I(inode), FI_APPEND_WRITE))
if (is_inode_flag_set(fi, FI_APPEND_WRITE))
add_dirty_inode(sbi, inode->i_ino, APPEND_INO);
if (is_inode_flag_set(F2FS_I(inode), FI_UPDATE_WRITE))
if (is_inode_flag_set(fi, FI_UPDATE_WRITE))
add_dirty_inode(sbi, inode->i_ino, UPDATE_INO);
if (is_inode_flag_set(fi, FI_FREE_NID)) {
if (err && err != -ENOENT)
alloc_nid_done(sbi, inode->i_ino);
else
alloc_nid_failed(sbi, inode->i_ino);
clear_inode_flag(fi, FI_FREE_NID);
}
if (err && err != -ENOENT) {
if (!exist_written_data(sbi, inode->i_ino, ORPHAN_INO)) {
/*
* get here because we failed to release resource
* of inode previously, reminder our user to run fsck
* for fixing.
*/
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"inode (ino:%lu) resource leak, run fsck "
"to fix this issue!", inode->i_ino);
}
}
out_clear:
#ifdef CONFIG_F2FS_FS_ENCRYPTION
if (F2FS_I(inode)->i_crypt_info)
f2fs_free_encryption_info(inode, F2FS_I(inode)->i_crypt_info);
if (fi->i_crypt_info)
f2fs_free_encryption_info(inode, fi->i_crypt_info);
#endif
clear_inode(inode);
}
......@@ -373,6 +397,7 @@ void f2fs_evict_inode(struct inode *inode)
void handle_failed_inode(struct inode *inode)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
int err = 0;
clear_nlink(inode);
make_bad_inode(inode);
......@@ -380,13 +405,29 @@ void handle_failed_inode(struct inode *inode)
i_size_write(inode, 0);
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
err = f2fs_truncate(inode, false);
remove_inode_page(inode);
if (!err)
err = remove_inode_page(inode);
clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
alloc_nid_failed(sbi, inode->i_ino);
/*
* if we skip truncate_node in remove_inode_page bacause we failed
* before, it's better to find another way to release resource of
* this inode (e.g. valid block count, node block or nid). Here we
* choose to add this inode to orphan list, so that we can call iput
* for releasing in orphan recovery flow.
*
* Note: we should add inode to orphan list before f2fs_unlock_op()
* so we can prevent losing this orphan when encoutering checkpoint
* and following suddenly power-off.
*/
if (err && err != -ENOENT) {
err = acquire_orphan_inode(sbi);
if (!err)
add_orphan_inode(sbi, inode->i_ino);
}
set_inode_flag(F2FS_I(inode), FI_FREE_NID);
f2fs_unlock_op(sbi);
/* iput will drop the inode object */
......
......@@ -53,7 +53,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (err) {
err = -EINVAL;
nid_free = true;
goto out;
goto fail;
}
/* If the directory encrypted, then we should encrypt the inode. */
......@@ -65,6 +65,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
if (f2fs_may_inline_dentry(inode))
set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
f2fs_init_extent_tree(inode, NULL);
stat_inc_inline_xattr(inode);
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
......@@ -72,15 +75,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
mark_inode_dirty(inode);
return inode;
out:
clear_nlink(inode);
unlock_new_inode(inode);
fail:
trace_f2fs_new_inode(inode, err);
make_bad_inode(inode);
iput(inode);
if (nid_free)
alloc_nid_failed(sbi, ino);
set_inode_flag(F2FS_I(inode), FI_FREE_NID);
iput(inode);
return ERR_PTR(err);
}
......@@ -89,7 +89,14 @@ static int is_multimedia_file(const unsigned char *s, const char *sub)
size_t slen = strlen(s);
size_t sublen = strlen(sub);
if (sublen > slen)
/*
* filename format of multimedia file should be defined as:
* "filename + '.' + extension".
*/
if (slen < sublen + 2)
return 0;
if (s[slen - sublen - 1] != '.')
return 0;
return !strncasecmp(s + slen - sublen, sub, sublen);
......
......@@ -159,7 +159,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
head = radix_tree_lookup(&nm_i->nat_set_root, set);
if (!head) {
head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
INIT_LIST_HEAD(&head->entry_list);
INIT_LIST_HEAD(&head->set_list);
......@@ -246,7 +246,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
{
struct nat_entry *new;
new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC);
new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_NOFS);
f2fs_radix_tree_insert(&nm_i->nat_root, nid, new);
memset(new, 0, sizeof(struct nat_entry));
nat_set_nid(new, nid);
......@@ -306,6 +306,10 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
unsigned char version = nat_get_version(e);
nat_set_version(e, inc_node_version(version));
/* in order to reuse the nid */
if (nm_i->next_scan_nid > ni->nid)
nm_i->next_scan_nid = ni->nid;
}
/* change address */
......@@ -328,11 +332,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
int nr = nr_shrink;
if (available_free_memory(sbi, NAT_ENTRIES))
if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0;
down_write(&nm_i->nat_tree_lock);
while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
struct nat_entry *ne;
ne = list_first_entry(&nm_i->nat_entries,
......@@ -341,7 +345,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
nr_shrink--;
}
up_write(&nm_i->nat_tree_lock);
return nr_shrink;
return nr - nr_shrink;
}
/*
......@@ -898,17 +902,20 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
* Caller should grab and release a rwsem by calling f2fs_lock_op() and
* f2fs_unlock_op().
*/
void remove_inode_page(struct inode *inode)
int remove_inode_page(struct inode *inode)
{
struct dnode_of_data dn;
int err;
set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
if (get_dnode_of_data(&dn, 0, LOOKUP_NODE))
return;
err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
if (err)
return err;
if (truncate_xattr_node(inode, dn.inode_page)) {
err = truncate_xattr_node(inode, dn.inode_page);
if (err) {
f2fs_put_dnode(&dn);
return;
return err;
}
/* remove potential inline_data blocks */
......@@ -922,6 +929,7 @@ void remove_inode_page(struct inode *inode)
/* will put inode & node pages */
truncate_node(&dn);
return 0;
}
struct page *new_inode_page(struct inode *inode)
......@@ -991,8 +999,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
/*
* Caller should do after getting the following values.
* 0: f2fs_put_page(page, 0)
* LOCKED_PAGE: f2fs_put_page(page, 1)
* error: nothing
* LOCKED_PAGE or error: f2fs_put_page(page, 1)
*/
static int read_node_page(struct page *page, int rw)
{
......@@ -1010,7 +1017,6 @@ static int read_node_page(struct page *page, int rw)
if (unlikely(ni.blk_addr == NULL_ADDR)) {
ClearPageUptodate(page);
f2fs_put_page(page, 1);
return -ENOENT;
}
......@@ -1041,10 +1047,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
return;
err = read_node_page(apage, READA);
if (err == 0)
f2fs_put_page(apage, 0);
else if (err == LOCKED_PAGE)
f2fs_put_page(apage, 1);
f2fs_put_page(apage, err ? 1 : 0);
}
struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
......@@ -1057,10 +1060,12 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
return ERR_PTR(-ENOMEM);
err = read_node_page(page, READ_SYNC);
if (err < 0)
if (err < 0) {
f2fs_put_page(page, 1);
return ERR_PTR(err);
else if (err != LOCKED_PAGE)
} else if (err != LOCKED_PAGE) {
lock_page(page);
}
if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) {
ClearPageUptodate(page);
......@@ -1096,10 +1101,12 @@ struct page *get_node_page_ra(struct page *parent, int start)
return ERR_PTR(-ENOMEM);
err = read_node_page(page, READ_SYNC);
if (err < 0)
if (err < 0) {
f2fs_put_page(page, 1);
return ERR_PTR(err);
else if (err == LOCKED_PAGE)
} else if (err == LOCKED_PAGE) {
goto page_hit;
}
blk_start_plug(&plug);
......@@ -1533,7 +1540,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
if (unlikely(nid >= nm_i->max_nid))
nid = 0;
if (i++ == FREE_NID_PAGES)
if (++i >= FREE_NID_PAGES)
break;
}
......@@ -1570,6 +1577,8 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
/* We should not use stale free nids created by build_free_nids */
if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
struct node_info ni;
f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
list_for_each_entry(i, &nm_i->free_nid_list, list)
if (i->state == NID_NEW)
......@@ -1580,6 +1589,13 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
i->state = NID_ALLOC;
nm_i->fcnt--;
spin_unlock(&nm_i->free_nid_list_lock);
/* check nid is allocated already */
get_node_info(sbi, *nid, &ni);
if (ni.blk_addr != NULL_ADDR) {
alloc_nid_done(sbi, *nid);
goto retry;
}
return true;
}
spin_unlock(&nm_i->free_nid_list_lock);
......@@ -1636,6 +1652,32 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
kmem_cache_free(free_nid_slab, i);
}
int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i, *next;
int nr = nr_shrink;
if (!mutex_trylock(&nm_i->build_lock))
return 0;
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK)
break;
if (i->state == NID_ALLOC)
continue;
__del_from_free_nid_list(nm_i, i);
kmem_cache_free(free_nid_slab, i);
nm_i->fcnt--;
nr_shrink--;
}
spin_unlock(&nm_i->free_nid_list_lock);
mutex_unlock(&nm_i->build_lock);
return nr - nr_shrink;
}
void recover_inline_xattr(struct inode *inode, struct page *page)
{
void *src_addr, *dst_addr;
......
......@@ -399,14 +399,35 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
for (; start < end; start++) {
for (; start < end; start++, dn.ofs_in_node++) {
block_t src, dest;
src = datablock_addr(dn.node_page, dn.ofs_in_node);
dest = datablock_addr(page, dn.ofs_in_node);
if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR &&
is_valid_blkaddr(sbi, dest, META_POR)) {
/* skip recovering if dest is the same as src */
if (src == dest)
continue;
/* dest is invalid, just invalidate src block */
if (dest == NULL_ADDR) {
truncate_data_blocks_range(&dn, 1);
continue;
}
/*
* dest is reserved block, invalidate src block
* and then reserve one new block in dnode page.
*/
if (dest == NEW_ADDR) {
truncate_data_blocks_range(&dn, 1);
err = reserve_new_block(&dn);
f2fs_bug_on(sbi, err);
continue;
}
/* dest is valid block, try to recover from src to dest */
if (is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
err = reserve_new_block(&dn);
......@@ -424,7 +445,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
ni.version, false);
recovered++;
}
dn.ofs_in_node++;
}
if (IS_INODE(dn.node_page))
......@@ -525,14 +545,12 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&inode_list);
/* step #1: find fsynced inode numbers */
set_sbi_flag(sbi, SBI_POR_DOING);
/* prevent checkpoint */
mutex_lock(&sbi->cp_mutex);
blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
/* step #1: find fsynced inode numbers */
err = find_fsync_dnodes(sbi, &inode_list);
if (err)
goto out;
......@@ -561,11 +579,20 @@ int recover_fsync_data(struct f2fs_sb_info *sbi)
clear_sbi_flag(sbi, SBI_POR_DOING);
if (err) {
discard_next_dnode(sbi, blkaddr);
bool invalidate = false;
if (discard_next_dnode(sbi, blkaddr))
invalidate = true;
/* Flush all the NAT/SIT pages */
while (get_pages(sbi, F2FS_DIRTY_META))
sync_meta_pages(sbi, META, LONG_MAX);
/* invalidate temporary meta page */
if (invalidate)
invalidate_mapping_pages(META_MAPPING(sbi),
blkaddr, blkaddr);
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
mutex_unlock(&sbi->cp_mutex);
} else if (need_writecp) {
......
......@@ -197,28 +197,20 @@ void register_inmem_page(struct inode *inode, struct page *page)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *new;
int err;
SetPagePrivate(page);
f2fs_trace_pid(page);
set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
SetPagePrivate(page);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
/* add atomic page indices to the list */
new->page = page;
INIT_LIST_HEAD(&new->list);
retry:
/* increase reference count with clean state */
mutex_lock(&fi->inmem_lock);
err = radix_tree_insert(&fi->inmem_root, page->index, new);
if (err == -EEXIST) {
mutex_unlock(&fi->inmem_lock);
kmem_cache_free(inmem_entry_slab, new);
return;
} else if (err) {
mutex_unlock(&fi->inmem_lock);
goto retry;
}
get_page(page);
list_add_tail(&new->list, &fi->inmem_pages);
inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
......@@ -227,7 +219,7 @@ void register_inmem_page(struct inode *inode, struct page *page)
trace_f2fs_register_inmem_page(page, INMEM);
}
void commit_inmem_pages(struct inode *inode, bool abort)
int commit_inmem_pages(struct inode *inode, bool abort)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
......@@ -239,6 +231,7 @@ void commit_inmem_pages(struct inode *inode, bool abort)
.rw = WRITE_SYNC | REQ_PRIO,
.encrypted_page = NULL,
};
int err = 0;
/*
* The abort is true only when f2fs_evict_inode is called.
......@@ -254,8 +247,8 @@ void commit_inmem_pages(struct inode *inode, bool abort)
mutex_lock(&fi->inmem_lock);
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
if (!abort) {
lock_page(cur->page);
if (!abort) {
if (cur->page->mapping == inode->i_mapping) {
set_page_dirty(cur->page);
f2fs_wait_on_page_writeback(cur->page, DATA);
......@@ -263,15 +256,20 @@ void commit_inmem_pages(struct inode *inode, bool abort)
inode_dec_dirty_pages(inode);
trace_f2fs_commit_inmem_page(cur->page, INMEM);
fio.page = cur->page;
do_write_data_page(&fio);
err = do_write_data_page(&fio);
submit_bio = true;
if (err) {
unlock_page(cur->page);
break;
}
}
f2fs_put_page(cur->page, 1);
} else {
trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
put_page(cur->page);
}
radix_tree_delete(&fi->inmem_root, cur->page->index);
set_page_private(cur->page, 0);
ClearPagePrivate(cur->page);
f2fs_put_page(cur->page, 1);
list_del(&cur->list);
kmem_cache_free(inmem_entry_slab, cur);
dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
......@@ -283,6 +281,7 @@ void commit_inmem_pages(struct inode *inode, bool abort)
if (submit_bio)
f2fs_submit_merged_bio(sbi, DATA, WRITE);
}
return err;
}
/*
......@@ -304,10 +303,18 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi)
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
/* try to shrink extent cache when there is no enough memory */
if (!available_free_memory(sbi, EXTENT_CACHE))
f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
/* check the # of cached NAT entries and prefree segments */
if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
/* check the # of cached NAT entries */
if (!available_free_memory(sbi, NAT_ENTRIES))
try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
if (!available_free_memory(sbi, FREE_NIDS))
try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES);
/* checkpoint is the only way to shrink partial cached entries */
if (!available_free_memory(sbi, NAT_ENTRIES) ||
excess_prefree_segs(sbi) ||
!available_free_memory(sbi, INO_ENTRIES))
f2fs_sync_fs(sbi->sb, true);
......@@ -323,10 +330,12 @@ static int issue_flush_thread(void *data)
return 0;
if (!llist_empty(&fcc->issue_list)) {
struct bio *bio = bio_alloc(GFP_NOIO, 0);
struct bio *bio;
struct flush_cmd *cmd, *next;
int ret;
bio = f2fs_bio_alloc(0);
fcc->dispatch_list = llist_del_all(&fcc->issue_list);
fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
......@@ -358,8 +367,15 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi)
if (test_opt(sbi, NOBARRIER))
return 0;
if (!test_opt(sbi, FLUSH_MERGE))
return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
if (!test_opt(sbi, FLUSH_MERGE)) {
struct bio *bio = f2fs_bio_alloc(0);
int ret;
bio->bi_bdev = sbi->sb->s_bdev;
ret = submit_bio_wait(WRITE_FLUSH, bio);
bio_put(bio);
return ret;
}
init_completion(&cmd.wait);
......@@ -503,7 +519,7 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
}
void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
int err = -ENOTSUPP;
......@@ -513,13 +529,16 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
if (f2fs_test_bit(offset, se->discard_map))
return;
return false;
err = f2fs_issue_discard(sbi, blkaddr, 1);
}
if (err)
if (err) {
update_meta_page(sbi, NULL, blkaddr);
return true;
}
return false;
}
static void __add_discard_entry(struct f2fs_sb_info *sbi,
......@@ -1218,7 +1237,8 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_lock(&sit_i->sentry_lock);
/* direct_io'ed data is aligned to the segment for better performance */
if (direct_io && curseg->next_blkoff)
if (direct_io && curseg->next_blkoff &&
!has_not_enough_free_secs(sbi, 0))
__allocate_new_segments(sbi, type);
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
......@@ -1733,7 +1753,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
static struct sit_entry_set *grab_sit_entry_set(void)
{
struct sit_entry_set *ses =
f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
ses->entry_cnt = 0;
INIT_LIST_HEAD(&ses->set_list);
......
......@@ -177,6 +177,15 @@ struct segment_allocation {
void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
};
/*
* this value is set in page as a private data which indicate that
* the page is atomically written, and it is in inmem_pages list.
*/
#define ATOMIC_WRITTEN_PAGE 0x0000ffff
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
struct inmem_pages {
struct list_head list;
struct page *page;
......@@ -555,16 +564,15 @@ static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type)
return curseg->next_blkoff;
}
#ifdef CONFIG_F2FS_CHECK_FS
static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
{
BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
f2fs_bug_on(sbi, segno > TOTAL_SEGS(sbi) - 1);
}
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
BUG_ON(blk_addr < SEG0_BLKADDR(sbi));
BUG_ON(blk_addr >= MAX_BLKADDR(sbi));
f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi)
|| blk_addr >= MAX_BLKADDR(sbi));
}
/*
......@@ -573,16 +581,11 @@ static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
static inline void check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
#ifdef CONFIG_F2FS_CHECK_FS
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
/* check segment usage */
BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg);
/* check boundary of a given segment number */
BUG_ON(segno > TOTAL_SEGS(sbi) - 1);
/* check bitmap with valid block count */
do {
if (is_valid) {
......@@ -598,35 +601,11 @@ static inline void check_block_count(struct f2fs_sb_info *sbi,
is_valid = !is_valid;
} while (cur_pos < sbi->blocks_per_seg);
BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks);
}
#else
static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno)
{
if (segno > TOTAL_SEGS(sbi) - 1)
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr)
{
if (blk_addr < SEG0_BLKADDR(sbi) || blk_addr >= MAX_BLKADDR(sbi))
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
/*
* Summary block is always treated as an invalid block
*/
static inline void check_block_count(struct f2fs_sb_info *sbi,
int segno, struct f2fs_sit_entry *raw_sit)
{
/* check segment usage */
if (GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg)
set_sbi_flag(sbi, SBI_NEED_FSCK);
/* check boundary of a given segment number */
if (segno > TOTAL_SEGS(sbi) - 1)
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
#endif
/* check segment usage, and check boundary of a given segment number */
f2fs_bug_on(sbi, GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1);
}
static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,
unsigned int start)
......
/*
* f2fs shrinker support
* the basic infra was copied from fs/ubifs/shrinker.c
*
* Copyright (c) 2015 Motorola Mobility
* Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
static LIST_HEAD(f2fs_list);
static DEFINE_SPINLOCK(f2fs_list_lock);
static unsigned int shrinker_run_no;
static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
{
return NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
}
static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
{
if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK)
return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK;
return 0;
}
static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
{
return sbi->total_ext_tree + atomic_read(&sbi->total_ext_node);
}
unsigned long f2fs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct f2fs_sb_info *sbi;
struct list_head *p;
unsigned long count = 0;
spin_lock(&f2fs_list_lock);
p = f2fs_list.next;
while (p != &f2fs_list) {
sbi = list_entry(p, struct f2fs_sb_info, s_list);
/* stop f2fs_put_super */
if (!mutex_trylock(&sbi->umount_mutex)) {
p = p->next;
continue;
}
spin_unlock(&f2fs_list_lock);
/* count extent cache entries */
count += __count_extent_cache(sbi);
/* shrink clean nat cache entries */
count += __count_nat_entries(sbi);
/* count free nids cache entries */
count += __count_free_nids(sbi);
spin_lock(&f2fs_list_lock);
p = p->next;
mutex_unlock(&sbi->umount_mutex);
}
spin_unlock(&f2fs_list_lock);
return count;
}
unsigned long f2fs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
unsigned long nr = sc->nr_to_scan;
struct f2fs_sb_info *sbi;
struct list_head *p;
unsigned int run_no;
unsigned long freed = 0;
spin_lock(&f2fs_list_lock);
do {
run_no = ++shrinker_run_no;
} while (run_no == 0);
p = f2fs_list.next;
while (p != &f2fs_list) {
sbi = list_entry(p, struct f2fs_sb_info, s_list);
if (sbi->shrinker_run_no == run_no)
break;
/* stop f2fs_put_super */
if (!mutex_trylock(&sbi->umount_mutex)) {
p = p->next;
continue;
}
spin_unlock(&f2fs_list_lock);
sbi->shrinker_run_no = run_no;
/* shrink extent cache entries */
freed += f2fs_shrink_extent_tree(sbi, nr >> 1);
/* shrink clean nat cache entries */
if (freed < nr)
freed += try_to_free_nats(sbi, nr - freed);
/* shrink free nids cache entries */
if (freed < nr)
freed += try_to_free_nids(sbi, nr - freed);
spin_lock(&f2fs_list_lock);
p = p->next;
list_move_tail(&sbi->s_list, &f2fs_list);
mutex_unlock(&sbi->umount_mutex);
if (freed >= nr)
break;
}
spin_unlock(&f2fs_list_lock);
return freed;
}
void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
{
spin_lock(&f2fs_list_lock);
list_add_tail(&sbi->s_list, &f2fs_list);
spin_unlock(&f2fs_list_lock);
}
void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
{
f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
spin_lock(&f2fs_list_lock);
list_del(&sbi->s_list);
spin_unlock(&f2fs_list_lock);
}
......@@ -39,6 +39,13 @@ static struct proc_dir_entry *f2fs_proc_root;
static struct kmem_cache *f2fs_inode_cachep;
static struct kset *f2fs_kset;
/* f2fs-wide shrinker description */
static struct shrinker f2fs_shrinker_info = {
.scan_objects = f2fs_shrink_scan,
.count_objects = f2fs_shrink_count,
.seeks = DEFAULT_SEEKS,
};
enum {
Opt_gc_background,
Opt_disable_roll_forward,
......@@ -58,6 +65,7 @@ enum {
Opt_nobarrier,
Opt_fastboot,
Opt_extent_cache,
Opt_noextent_cache,
Opt_noinline_data,
Opt_err,
};
......@@ -81,6 +89,7 @@ static match_table_t f2fs_tokens = {
{Opt_nobarrier, "nobarrier"},
{Opt_fastboot, "fastboot"},
{Opt_extent_cache, "extent_cache"},
{Opt_noextent_cache, "noextent_cache"},
{Opt_noinline_data, "noinline_data"},
{Opt_err, NULL},
};
......@@ -382,6 +391,9 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_extent_cache:
set_opt(sbi, EXTENT_CACHE);
break;
case Opt_noextent_cache:
clear_opt(sbi, EXTENT_CACHE);
break;
case Opt_noinline_data:
clear_opt(sbi, INLINE_DATA);
break;
......@@ -410,9 +422,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
atomic_set(&fi->dirty_pages, 0);
fi->i_current_depth = 1;
fi->i_advise = 0;
rwlock_init(&fi->ext_lock);
init_rwsem(&fi->i_sem);
INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS);
INIT_LIST_HEAD(&fi->inmem_pages);
mutex_init(&fi->inmem_lock);
......@@ -441,17 +451,22 @@ static int f2fs_drop_inode(struct inode *inode)
*/
if (!inode_unhashed(inode) && inode->i_state & I_SYNC) {
if (!inode->i_nlink && !is_bad_inode(inode)) {
/* to avoid evict_inode call simultaneously */
atomic_inc(&inode->i_count);
spin_unlock(&inode->i_lock);
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
commit_inmem_pages(inode, true);
/* should remain fi->extent_tree for writepage */
f2fs_destroy_extent_node(inode);
sb_start_intwrite(inode->i_sb);
i_size_write(inode, 0);
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
f2fs_truncate(inode, true);
sb_end_intwrite(inode->i_sb);
......@@ -461,6 +476,7 @@ static int f2fs_drop_inode(struct inode *inode)
F2FS_I(inode)->i_crypt_info);
#endif
spin_lock(&inode->i_lock);
atomic_dec(&inode->i_count);
}
return 0;
}
......@@ -498,9 +514,11 @@ static void f2fs_put_super(struct super_block *sb)
}
kobject_del(&sbi->s_kobj);
f2fs_destroy_stats(sbi);
stop_gc_thread(sbi);
/* prevent remaining shrinker jobs */
mutex_lock(&sbi->umount_mutex);
/*
* We don't need to do checkpoint when superblock is clean.
* But, the previous checkpoint was not done by umount, it needs to do
......@@ -514,6 +532,9 @@ static void f2fs_put_super(struct super_block *sb)
write_checkpoint(sbi, &cpc);
}
/* write_checkpoint can update stat informaion */
f2fs_destroy_stats(sbi);
/*
* normally superblock is clean, so we need to release this.
* In addition, EIO will skip do checkpoint, we need this as well.
......@@ -521,6 +542,9 @@ static void f2fs_put_super(struct super_block *sb)
release_dirty_inode(sbi);
release_discard_addrs(sbi);
f2fs_leave_shrinker(sbi);
mutex_unlock(&sbi->umount_mutex);
iput(sbi->node_inode);
iput(sbi->meta_inode);
......@@ -647,6 +671,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",fastboot");
if (test_opt(sbi, EXTENT_CACHE))
seq_puts(seq, ",extent_cache");
else
seq_puts(seq, ",noextent_cache");
seq_printf(seq, ",active_logs=%u", sbi->active_logs);
return 0;
......@@ -667,7 +693,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset)
struct seg_entry *se = get_seg_entry(sbi, i);
if ((i % 10) == 0)
seq_printf(seq, "%-5d", i);
seq_printf(seq, "%-10d", i);
seq_printf(seq, "%d|%-3u", se->type,
get_valid_blocks(sbi, i, 1));
if ((i % 10) == 9 || i == (total_segs - 1))
......@@ -699,6 +725,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, BG_GC);
set_opt(sbi, INLINE_DATA);
set_opt(sbi, EXTENT_CACHE);
#ifdef CONFIG_F2FS_FS_XATTR
set_opt(sbi, XATTR_USER);
......@@ -970,6 +997,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->dir_level = DEF_DIR_LEVEL;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
}
/*
......@@ -1135,7 +1165,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
init_rwsem(&sbi->node_write);
clear_sbi_flag(sbi, SBI_POR_DOING);
/* disallow all the data/node/meta page writes */
set_sbi_flag(sbi, SBI_POR_DOING);
spin_lock_init(&sbi->stat_lock);
init_rwsem(&sbi->read_io.io_rwsem);
......@@ -1212,8 +1244,12 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_nm;
}
f2fs_join_shrinker(sbi);
/* if there are nt orphan nodes free them */
recover_orphan_inodes(sbi);
err = recover_orphan_inodes(sbi);
if (err)
goto free_node_inode;
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
......@@ -1275,6 +1311,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_kobj;
}
}
/* recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
/*
* If filesystem is not mounted as read-only then
......@@ -1308,7 +1346,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
dput(sb->s_root);
sb->s_root = NULL;
free_node_inode:
mutex_lock(&sbi->umount_mutex);
f2fs_leave_shrinker(sbi);
iput(sbi->node_inode);
mutex_unlock(&sbi->umount_mutex);
free_nm:
destroy_node_manager(sbi);
free_sm:
......@@ -1404,13 +1445,20 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_crypto();
if (err)
goto free_kset;
err = register_filesystem(&f2fs_fs_type);
err = register_shrinker(&f2fs_shrinker_info);
if (err)
goto free_crypto;
err = register_filesystem(&f2fs_fs_type);
if (err)
goto free_shrinker;
f2fs_create_root_stats();
f2fs_proc_root = proc_mkdir("fs/f2fs", NULL);
return 0;
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_crypto:
f2fs_exit_crypto();
free_kset:
......@@ -1433,6 +1481,7 @@ static void __exit exit_f2fs_fs(void)
{
remove_proc_entry("fs/f2fs", NULL);
f2fs_destroy_root_stats();
unregister_shrinker(&f2fs_shrinker_info);
unregister_filesystem(&f2fs_fs_type);
f2fs_exit_crypto();
destroy_extent_cache();
......
......@@ -499,9 +499,12 @@ static int __f2fs_setxattr(struct inode *inode, int index,
len = strlen(name);
if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode))
if (len > F2FS_NAME_LEN)
return -ERANGE;
if (size > MAX_VALUE_LEN(inode))
return -E2BIG;
base_addr = read_all_xattrs(inode, ipage);
if (!base_addr)
goto exit;
......
......@@ -417,15 +417,25 @@ typedef __le32 f2fs_hash_t;
#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS)
/* the number of dentry in a block */
#define NR_DENTRY_IN_BLOCK 214
/* MAX level for dir lookup */
#define MAX_DIR_HASH_DEPTH 63
/* MAX buckets in one level of dir */
#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
/*
* space utilization of regular dentry and inline dentry
* regular dentry inline dentry
* bitmap 1 * 27 = 27 1 * 23 = 23
* reserved 1 * 3 = 3 1 * 7 = 7
* dentry 11 * 214 = 2354 11 * 182 = 2002
* filename 8 * 214 = 1712 8 * 182 = 1456
* total 4096 3488
*
* Note: there are more reserved space in inline dentry than in regular
* dentry, when converting inline dentry we should handle this carefully.
*/
#define NR_DENTRY_IN_BLOCK 214 /* the number of dentry in a block */
#define SIZE_OF_DIR_ENTRY 11 /* by byte */
#define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
BITS_PER_BYTE)
......
......@@ -1099,11 +1099,11 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start,
TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
TP_PROTO(struct inode *inode, unsigned int pgofs,
struct extent_node *en),
struct extent_info *ei),
TP_ARGS(inode, pgofs, en),
TP_ARGS(inode, pgofs, ei),
TP_CONDITION(en),
TP_CONDITION(ei),
TP_STRUCT__entry(
__field(dev_t, dev)
......@@ -1118,9 +1118,9 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->pgofs = pgofs;
__entry->fofs = en->ei.fofs;
__entry->blk = en->ei.blk;
__entry->len = en->ei.len;
__entry->fofs = ei->fofs;
__entry->blk = ei->blk;
__entry->len = ei->len;
),
TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment