Commit 8f45533e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'f2fs-for-5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "In this round, we've introduced fairly small number of patches as below.

  Enhancements:
   - improve the in-place-update IO flow
   - allocate segment to guarantee no GC for pinned files

  Bug fixes:
   - fix updatetime in lazytime mode
   - potential memory leak in f2fs_listxattr
   - record parent inode number in rename2 correctly
   - fix deadlock in f2fs_gc along with atomic writes
   - avoid needless data migration in GC"

* tag 'f2fs-for-5.5' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs:
  f2fs: stop GC when the victim becomes fully valid
  f2fs: expose main_blkaddr in sysfs
  f2fs: choose hardlimit when softlimit is larger than hardlimit in f2fs_statfs_project()
  f2fs: Fix deadlock in f2fs_gc() context during atomic files handling
  f2fs: show f2fs instance in printk_ratelimited
  f2fs: fix potential overflow
  f2fs: fix to update dir's i_pino during cross_rename
  f2fs: support aligned pinned file
  f2fs: avoid kernel panic on corruption test
  f2fs: fix wrong description in document
  f2fs: cache global IPU bio
  f2fs: fix to avoid memory leakage in f2fs_listxattr
  f2fs: check total_segments from devices in raw_super
  f2fs: update multi-dev metadata in resize_fs
  f2fs: mark recovery flag correctly in read_raw_super_block()
  f2fs: fix to update time in lazytime mode
parents 4a55d362 803e74be
......@@ -31,6 +31,12 @@ Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
Description:
Controls the issue rate of segment discard commands.
What: /sys/fs/f2fs/<disk>/max_blkaddr
Date: November 2019
Contact: "Ramon Pantin" <pantin@google.com>
Description:
Shows first block address of MAIN area.
What: /sys/fs/f2fs/<disk>/ipu_policy
Date: November 2013
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
......
......@@ -297,6 +297,9 @@ Files in /sys/fs/f2fs/<devname>
reclaim the prefree segments to free segments.
By default, 5% over total # of segments.
main_blkaddr This value gives the first block address of
MAIN area in the partition.
max_small_discards This parameter controls the number of discard
commands that consist small blocks less than 2MB.
The candidates to be discarded are cached until
......@@ -346,7 +349,7 @@ Files in /sys/fs/f2fs/<devname>
ram_thresh This parameter controls the memory footprint used
by free nids and cached nat entries. By default,
10 is set, which indicates 10 MB / 1 GB RAM.
1 is set, which indicates 10 MB / 1 GB RAM.
ra_nid_pages When building free nids, F2FS reads NAT blocks
ahead for speed up. Default is 0.
......
......@@ -581,7 +581,7 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi)
if (time_to_inject(sbi, FAULT_ORPHAN)) {
spin_unlock(&im->ino_lock);
f2fs_show_injection_info(FAULT_ORPHAN);
f2fs_show_injection_info(sbi, FAULT_ORPHAN);
return -ENOSPC;
}
......
......@@ -29,6 +29,7 @@
#define NUM_PREALLOC_POST_READ_CTXS 128
static struct kmem_cache *bio_post_read_ctx_cache;
static struct kmem_cache *bio_entry_slab;
static mempool_t *bio_post_read_ctx_pool;
static bool __is_cp_guaranteed(struct page *page)
......@@ -167,9 +168,10 @@ static bool f2fs_bio_post_read_required(struct bio *bio)
static void f2fs_read_end_io(struct bio *bio)
{
if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
FAULT_READ_IO)) {
f2fs_show_injection_info(FAULT_READ_IO);
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
if (time_to_inject(sbi, FAULT_READ_IO)) {
f2fs_show_injection_info(sbi, FAULT_READ_IO);
bio->bi_status = BLK_STS_IOERR;
}
......@@ -191,7 +193,7 @@ static void f2fs_write_end_io(struct bio *bio)
struct bvec_iter_all iter_all;
if (time_to_inject(sbi, FAULT_WRITE_IO)) {
f2fs_show_injection_info(FAULT_WRITE_IO);
f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
bio->bi_status = BLK_STS_IOERR;
}
......@@ -543,6 +545,126 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
return io_type_is_mergeable(io, fio);
}
static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
struct page *page, enum temp_type temp)
{
struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
struct bio_entry *be;
be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
be->bio = bio;
bio_get(bio);
if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
f2fs_bug_on(sbi, 1);
down_write(&io->bio_list_lock);
list_add_tail(&be->list, &io->bio_list);
up_write(&io->bio_list_lock);
}
static void del_bio_entry(struct bio_entry *be)
{
list_del(&be->list);
kmem_cache_free(bio_entry_slab, be);
}
static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio,
struct page *page)
{
enum temp_type temp;
bool found = false;
int ret = -EAGAIN;
for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
struct list_head *head = &io->bio_list;
struct bio_entry *be;
down_write(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (be->bio != *bio)
continue;
found = true;
if (bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) {
ret = 0;
break;
}
/* bio is full */
del_bio_entry(be);
__submit_bio(sbi, *bio, DATA);
break;
}
up_write(&io->bio_list_lock);
}
if (ret) {
bio_put(*bio);
*bio = NULL;
}
return ret;
}
void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
struct bio **bio, struct page *page)
{
enum temp_type temp;
bool found = false;
struct bio *target = bio ? *bio : NULL;
for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
struct list_head *head = &io->bio_list;
struct bio_entry *be;
if (list_empty(head))
continue;
down_read(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (target)
found = (target == be->bio);
else
found = __has_merged_page(be->bio, NULL,
page, 0);
if (found)
break;
}
up_read(&io->bio_list_lock);
if (!found)
continue;
found = false;
down_write(&io->bio_list_lock);
list_for_each_entry(be, head, list) {
if (target)
found = (target == be->bio);
else
found = __has_merged_page(be->bio, NULL,
page, 0);
if (found) {
target = be->bio;
del_bio_entry(be);
break;
}
}
up_write(&io->bio_list_lock);
}
if (found)
__submit_bio(sbi, target, DATA);
if (bio && *bio) {
bio_put(*bio);
*bio = NULL;
}
}
int f2fs_merge_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio = *fio->bio;
......@@ -557,19 +679,16 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
f2fs_trace_ios(fio, 0);
if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
fio->new_blkaddr)) {
__submit_bio(fio->sbi, bio, fio->type);
bio = NULL;
}
fio->new_blkaddr))
f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
alloc_new:
if (!bio) {
bio = __bio_alloc(fio, BIO_MAX_PAGES);
bio_set_op_attrs(bio, fio->op, fio->op_flags);
}
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
__submit_bio(fio->sbi, bio, fio->type);
bio = NULL;
add_bio_entry(fio->sbi, bio, page, fio->temp);
} else {
if (add_ipu_page(fio->sbi, &bio, page))
goto alloc_new;
}
......@@ -584,19 +703,6 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
return 0;
}
static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio,
struct page *page)
{
if (!bio)
return;
if (!__has_merged_page(*bio, NULL, page, 0))
return;
__submit_bio(sbi, *bio, DATA);
*bio = NULL;
}
void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
......@@ -2098,7 +2204,7 @@ static int __write_data_page(struct page *page, bool *submitted,
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = ((unsigned long long) i_size)
>> PAGE_SHIFT;
loff_t psize = (page->index + 1) << PAGE_SHIFT;
loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
unsigned offset = 0;
bool need_balance_fs = false;
int err = 0;
......@@ -2215,14 +2321,12 @@ static int __write_data_page(struct page *page, bool *submitted,
unlock_page(page);
if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
!F2FS_I(inode)->cp_task) {
f2fs_submit_ipu_bio(sbi, bio, page);
!F2FS_I(inode)->cp_task)
f2fs_balance_fs(sbi, need_balance_fs);
}
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_submit_merged_write(sbi, DATA);
f2fs_submit_merged_ipu_write(sbi, bio, NULL);
submitted = NULL;
}
......@@ -2342,14 +2446,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
}
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE) {
if (wbc->sync_mode != WB_SYNC_NONE)
f2fs_wait_on_page_writeback(page,
DATA, true, true);
f2fs_submit_ipu_bio(sbi, &bio, page);
} else {
else
goto continue_unlock;
}
}
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
......@@ -2406,7 +2508,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
NULL, 0, DATA);
/* submit cached bio of IPU write */
if (bio)
__submit_bio(sbi, bio, DATA);
f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
return ret;
}
......@@ -3211,8 +3313,22 @@ int __init f2fs_init_post_read_processing(void)
return -ENOMEM;
}
void __exit f2fs_destroy_post_read_processing(void)
void f2fs_destroy_post_read_processing(void)
{
mempool_destroy(bio_post_read_ctx_pool);
kmem_cache_destroy(bio_post_read_ctx_cache);
}
int __init f2fs_init_bio_entry_cache(void)
{
bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab",
sizeof(struct bio_entry));
if (!bio_entry_slab)
return -ENOMEM;
return 0;
}
void __exit f2fs_destroy_bio_entry_cache(void)
{
kmem_cache_destroy(bio_entry_slab);
}
......@@ -628,7 +628,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,
start:
if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
f2fs_show_injection_info(FAULT_DIR_DEPTH);
f2fs_show_injection_info(F2FS_I_SB(dir), FAULT_DIR_DEPTH);
return -ENOSPC;
}
......@@ -919,8 +919,9 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
bit_pos++;
ctx->pos = start_pos + bit_pos;
printk_ratelimited(
"%s, invalid namelen(0), ino:%u, run fsck to fix.",
KERN_WARNING, le32_to_cpu(de->ino));
"%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.",
KERN_WARNING, sbi->sb->s_id,
le32_to_cpu(de->ino));
set_sbi_flag(sbi, SBI_NEED_FSCK);
continue;
}
......
......@@ -890,6 +890,7 @@ enum {
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
NO_CHECK_TYPE,
CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
};
struct flush_cmd {
......@@ -1068,6 +1069,11 @@ struct f2fs_io_info {
unsigned char version; /* version of the node */
};
struct bio_entry {
struct bio *bio;
struct list_head list;
};
#define is_read_io(rw) ((rw) == READ)
struct f2fs_bio_info {
struct f2fs_sb_info *sbi; /* f2fs superblock */
......@@ -1077,6 +1083,8 @@ struct f2fs_bio_info {
struct rw_semaphore io_rwsem; /* blocking op for bio */
spinlock_t io_lock; /* serialize DATA/NODE IOs */
struct list_head io_list; /* track fios */
struct list_head bio_list; /* bio entry list head */
struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */
};
#define FDEV(i) (sbi->devs[i])
......@@ -1289,11 +1297,13 @@ struct f2fs_sb_info {
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
/* for skip statistic */
unsigned int atomic_files; /* # of opened atomic file */
unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */
unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
u64 gc_pin_file_threshold;
struct rw_semaphore pin_sem;
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
......@@ -1365,9 +1375,10 @@ struct f2fs_private_dio {
};
#ifdef CONFIG_F2FS_FAULT_INJECTION
#define f2fs_show_injection_info(type) \
printk_ratelimited("%sF2FS-fs : inject %s in %s of %pS\n", \
KERN_INFO, f2fs_fault_name[type], \
#define f2fs_show_injection_info(sbi, type) \
printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \
KERN_INFO, sbi->sb->s_id, \
f2fs_fault_name[type], \
__func__, __builtin_return_address(0))
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
......@@ -1387,7 +1398,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
return false;
}
#else
#define f2fs_show_injection_info(type) do { } while (0)
#define f2fs_show_injection_info(sbi, type) do { } while (0)
static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type)
{
return false;
......@@ -1772,7 +1783,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
return ret;
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
f2fs_show_injection_info(sbi, FAULT_BLOCK);
release = *count;
goto release_quota;
}
......@@ -2024,7 +2035,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
}
if (time_to_inject(sbi, FAULT_BLOCK)) {
f2fs_show_injection_info(FAULT_BLOCK);
f2fs_show_injection_info(sbi, FAULT_BLOCK);
goto enospc;
}
......@@ -2139,7 +2150,8 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
return page;
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) {
f2fs_show_injection_info(FAULT_PAGE_ALLOC);
f2fs_show_injection_info(F2FS_M_SB(mapping),
FAULT_PAGE_ALLOC);
return NULL;
}
}
......@@ -2154,7 +2166,7 @@ static inline struct page *f2fs_pagecache_get_page(
int fgp_flags, gfp_t gfp_mask)
{
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) {
f2fs_show_injection_info(FAULT_PAGE_GET);
f2fs_show_injection_info(F2FS_M_SB(mapping), FAULT_PAGE_GET);
return NULL;
}
......@@ -2223,7 +2235,7 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
return bio;
}
if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
f2fs_show_injection_info(FAULT_ALLOC_BIO);
f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO);
return NULL;
}
......@@ -2704,6 +2716,20 @@ static inline void clear_file(struct inode *inode, int type)
f2fs_mark_inode_dirty_sync(inode, true);
}
static inline bool f2fs_is_time_consistent(struct inode *inode)
{
if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3,
&F2FS_I(inode)->i_crtime))
return false;
return true;
}
static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
{
bool ret;
......@@ -2721,14 +2747,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync)
i_size_read(inode) & ~PAGE_MASK)
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 3,
&F2FS_I(inode)->i_crtime))
if (!f2fs_is_time_consistent(inode))
return false;
down_read(&F2FS_I(inode)->i_sem);
......@@ -2783,7 +2802,7 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi,
void *ret;
if (time_to_inject(sbi, FAULT_KMALLOC)) {
f2fs_show_injection_info(FAULT_KMALLOC);
f2fs_show_injection_info(sbi, FAULT_KMALLOC);
return NULL;
}
......@@ -2804,7 +2823,7 @@ static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi,
size_t size, gfp_t flags)
{
if (time_to_inject(sbi, FAULT_KVMALLOC)) {
f2fs_show_injection_info(FAULT_KVMALLOC);
f2fs_show_injection_info(sbi, FAULT_KVMALLOC);
return NULL;
}
......@@ -3102,7 +3121,7 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
......@@ -3188,10 +3207,14 @@ void f2fs_destroy_checkpoint_caches(void);
*/
int f2fs_init_post_read_processing(void);
void f2fs_destroy_post_read_processing(void);
int f2fs_init_bio_entry_cache(void);
void f2fs_destroy_bio_entry_cache(void);
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type);
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct inode *inode, struct page *page,
nid_t ino, enum page_type type);
void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
struct bio **bio, struct page *page);
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
int f2fs_merge_page_bio(struct f2fs_io_info *fio);
......
......@@ -681,7 +681,7 @@ int f2fs_truncate(struct inode *inode)
trace_f2fs_truncate(inode);
if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) {
f2fs_show_injection_info(FAULT_TRUNCATE);
f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_TRUNCATE);
return -EIO;
}
......@@ -1142,7 +1142,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode,
}
dn.ofs_in_node++;
i++;
new_size = (dst + i) << PAGE_SHIFT;
new_size = (loff_t)(dst + i) << PAGE_SHIFT;
if (dst_inode->i_size < new_size)
f2fs_i_size_write(dst_inode, new_size);
} while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR));
......@@ -1548,12 +1548,44 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (off_end)
map.m_len++;
if (f2fs_is_pinned_file(inode))
map.m_seg_type = CURSEG_COLD_DATA;
if (!map.m_len)
return 0;
if (f2fs_is_pinned_file(inode)) {
block_t len = (map.m_len >> sbi->log_blocks_per_seg) <<
sbi->log_blocks_per_seg;
block_t done = 0;
if (map.m_len % sbi->blocks_per_seg)
len += sbi->blocks_per_seg;
map.m_len = sbi->blocks_per_seg;
next_alloc:
if (has_not_enough_free_secs(sbi, 0,
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
mutex_lock(&sbi->gc_mutex);
err = f2fs_gc(sbi, true, false, NULL_SEGNO);
if (err && err != -ENODATA && err != -EAGAIN)
goto out_err;
}
down_write(&sbi->pin_sem);
map.m_seg_type = CURSEG_COLD_DATA_PINNED;
f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA);
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
up_write(&sbi->pin_sem);
done += map.m_len;
len -= map.m_len;
map.m_lblk += map.m_len;
if (!err && len)
goto next_alloc;
err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ?
F2FS_GET_BLOCK_PRE_DIO :
F2FS_GET_BLOCK_PRE_AIO));
map.m_len = done;
} else {
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
}
out_err:
if (err) {
pgoff_t last_off;
......@@ -1893,6 +1925,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (list_empty(&fi->inmem_ilist))
list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
sbi->atomic_files++;
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
/* add inode in inmem_list first and set atomic_file */
......
......@@ -54,7 +54,7 @@ static int gc_thread_func(void *data)
}
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
......@@ -1012,8 +1012,14 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t start_bidx;
nid_t nid = le32_to_cpu(entry->nid);
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0))
/*
* stop BG_GC if there is not enough free sections.
* Or, stop GC if the segment becomes fully valid caused by
* race condition along with SSR block allocation.
*/
if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) ||
get_valid_blocks(sbi, segno, false) ==
sbi->blocks_per_seg)
return submitted;
if (check_valid_map(sbi, segno, off) == 0)
......@@ -1437,11 +1443,20 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
raw_sb->block_count = cpu_to_le64(block_count +
(long long)segs * sbi->blocks_per_seg);
if (f2fs_is_multi_device(sbi)) {
int last_dev = sbi->s_ndevs - 1;
int dev_segs =
le32_to_cpu(raw_sb->devs[last_dev].total_segments);
raw_sb->devs[last_dev].total_segments =
cpu_to_le32(dev_segs + segs);
}
}
static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
{
int segs = secs * sbi->segs_per_sec;
long long blks = (long long)segs * sbi->blocks_per_seg;
long long user_block_count =
le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
......@@ -1449,8 +1464,20 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count +
(long long)segs * sbi->blocks_per_seg);
F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks);
if (f2fs_is_multi_device(sbi)) {
int last_dev = sbi->s_ndevs - 1;
FDEV(last_dev).total_segments =
(int)FDEV(last_dev).total_segments + segs;
FDEV(last_dev).end_blk =
(long long)FDEV(last_dev).end_blk + blks;
#ifdef CONFIG_BLK_DEV_ZONED
FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz +
(int)(blks >> sbi->log_blocks_per_blkz);
#endif
}
}
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
......@@ -1465,6 +1492,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
if (block_count > old_block_count)
return -EINVAL;
if (f2fs_is_multi_device(sbi)) {
int last_dev = sbi->s_ndevs - 1;
__u64 last_segs = FDEV(last_dev).total_segments;
if (block_count + last_segs * sbi->blocks_per_seg <=
old_block_count)
return -EINVAL;
}
/* new fs size should align to section size */
div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem);
if (rem)
......
......@@ -615,7 +615,11 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
inode->i_ino == F2FS_META_INO(sbi))
return 0;
if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
/*
* atime could be updated without dirtying f2fs inode in lazytime mode
*/
if (f2fs_is_time_consistent(inode) &&
!is_inode_flag_set(inode, FI_DIRTY_INODE))
return 0;
if (!f2fs_is_checkpoint_ready(sbi))
......@@ -677,7 +681,7 @@ void f2fs_evict_inode(struct inode *inode)
err = f2fs_truncate(inode);
if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
f2fs_show_injection_info(FAULT_EVICT_INODE);
f2fs_show_injection_info(sbi, FAULT_EVICT_INODE);
err = -EIO;
}
......
......@@ -981,7 +981,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!old_dir_entry || whiteout)
file_lost_pino(old_inode);
else
F2FS_I(old_inode)->i_pino = new_dir->i_ino;
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(old_inode, new_dir->i_ino);
up_write(&F2FS_I(old_inode)->i_sem);
old_inode->i_ctime = current_time(old_inode);
......@@ -1141,7 +1142,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_set_link(old_dir, old_entry, old_page, new_inode);
down_write(&F2FS_I(old_inode)->i_sem);
if (!old_dir_entry)
file_lost_pino(old_inode);
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(old_inode, new_dir->i_ino);
up_write(&F2FS_I(old_inode)->i_sem);
old_dir->i_ctime = current_time(old_dir);
......@@ -1156,7 +1161,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
down_write(&F2FS_I(new_inode)->i_sem);
if (!new_dir_entry)
file_lost_pino(new_inode);
else
/* adjust dir's i_pino to pass fsck check */
f2fs_i_pino_write(new_inode, old_dir->i_ino);
up_write(&F2FS_I(new_inode)->i_sem);
new_dir->i_ctime = current_time(new_dir);
......
......@@ -2349,7 +2349,6 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
if (ret) {
up_read(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, !mount);
f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
return ret;
}
......@@ -2399,7 +2398,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
struct free_nid *i = NULL;
retry:
if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
f2fs_show_injection_info(FAULT_ALLOC_NID);
f2fs_show_injection_info(sbi, FAULT_ALLOC_NID);
return false;
}
......
......@@ -711,7 +711,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
f2fs_put_page(page, 1);
}
if (!err)
f2fs_allocate_new_segments(sbi);
f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE);
return err;
}
......
......@@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
struct inode *inode;
struct f2fs_inode_info *fi;
unsigned int count = sbi->atomic_files;
unsigned int looped = 0;
next:
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (list_empty(head)) {
......@@ -296,22 +298,26 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
}
fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
inode = igrab(&fi->vfs_inode);
if (inode)
list_move_tail(&fi->inmem_ilist, head);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
if (inode) {
if (gc_failure) {
if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
goto drop;
if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
goto skip;
}
drop:
set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
f2fs_drop_inmem_pages(inode);
skip:
iput(inode);
}
skip:
congestion_wait(BLK_RW_ASYNC, HZ/50);
cond_resched();
if (gc_failure) {
if (++looped >= count)
return;
}
goto next;
}
......@@ -327,13 +333,16 @@ void f2fs_drop_inmem_pages(struct inode *inode)
mutex_unlock(&fi->inmem_lock);
}
clear_inode_flag(inode, FI_ATOMIC_FILE);
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
stat_dec_atomic_write(inode);
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(inode, FI_ATOMIC_FILE);
sbi->atomic_files--;
}
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
}
......@@ -480,7 +489,7 @@ int f2fs_commit_inmem_pages(struct inode *inode)
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
f2fs_show_injection_info(FAULT_CHECKPOINT);
f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
f2fs_stop_checkpoint(sbi, false);
}
......@@ -1008,8 +1017,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
if (dc->error)
printk_ratelimited(
"%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
"%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
KERN_INFO, sbi->sb->s_id,
dc->lstart, dc->start, dc->len, dc->error);
__detach_discard_cmd(dcc, dc);
}
......@@ -1149,7 +1159,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
dc->len += len;
if (time_to_inject(sbi, FAULT_DISCARD)) {
f2fs_show_injection_info(FAULT_DISCARD);
f2fs_show_injection_info(sbi, FAULT_DISCARD);
err = -EIO;
goto submit;
}
......@@ -2691,7 +2701,7 @@ void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
up_read(&SM_I(sbi)->curseg_lock);
}
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg;
unsigned int old_segno;
......@@ -2700,11 +2710,18 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
down_write(&SIT_I(sbi)->sentry_lock);
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
if (type != NO_CHECK_TYPE && i != type)
continue;
curseg = CURSEG_I(sbi, i);
if (type == NO_CHECK_TYPE || curseg->next_blkoff ||
get_valid_blocks(sbi, curseg->segno, false) ||
get_ckpt_valid_blocks(sbi, curseg->segno)) {
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
locate_dirty_segment(sbi, old_segno);
}
}
up_write(&SIT_I(sbi)->sentry_lock);
}
......@@ -3069,6 +3086,19 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
bool put_pin_sem = false;
if (type == CURSEG_COLD_DATA) {
/* GC during CURSEG_COLD_DATA_PINNED allocation */
if (down_read_trylock(&sbi->pin_sem)) {
put_pin_sem = true;
} else {
type = CURSEG_WARM_DATA;
curseg = CURSEG_I(sbi, type);
}
} else if (type == CURSEG_COLD_DATA_PINNED) {
type = CURSEG_COLD_DATA;
}
down_read(&SM_I(sbi)->curseg_lock);
......@@ -3134,6 +3164,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_unlock(&curseg->curseg_mutex);
up_read(&SM_I(sbi)->curseg_lock);
if (put_pin_sem)
up_read(&sbi->pin_sem);
}
static void update_device_state(struct f2fs_io_info *fio)
......@@ -3380,7 +3413,10 @@ void f2fs_wait_on_page_writeback(struct page *page,
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
/* submit cached LFS IO */
f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
/* sbumit cached IPU IO */
f2fs_submit_merged_ipu_write(sbi, NULL, page);
if (ordered) {
wait_on_page_writeback(page);
f2fs_bug_on(sbi, locked && PageWriteback(page));
......
......@@ -313,6 +313,8 @@ struct sit_entry_set {
*/
static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
{
if (type == CURSEG_COLD_DATA_PINNED)
type = CURSEG_COLD_DATA;
return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
}
......
......@@ -1213,9 +1213,13 @@ static int f2fs_statfs_project(struct super_block *sb,
return PTR_ERR(dquot);
spin_lock(&dquot->dq_dqb_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ?
dquot->dq_dqb.dqb_bsoftlimit :
dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits;
limit = 0;
if (dquot->dq_dqb.dqb_bsoftlimit)
limit = dquot->dq_dqb.dqb_bsoftlimit;
if (dquot->dq_dqb.dqb_bhardlimit &&
(!limit || dquot->dq_dqb.dqb_bhardlimit < limit))
limit = dquot->dq_dqb.dqb_bhardlimit;
if (limit && buf->f_blocks > limit) {
curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits;
buf->f_blocks = limit;
......@@ -1224,9 +1228,13 @@ static int f2fs_statfs_project(struct super_block *sb,
(buf->f_blocks - curblock) : 0;
}
limit = dquot->dq_dqb.dqb_isoftlimit ?
dquot->dq_dqb.dqb_isoftlimit :
dquot->dq_dqb.dqb_ihardlimit;
limit = 0;
if (dquot->dq_dqb.dqb_isoftlimit)
limit = dquot->dq_dqb.dqb_isoftlimit;
if (dquot->dq_dqb.dqb_ihardlimit &&
(!limit || dquot->dq_dqb.dqb_ihardlimit < limit))
limit = dquot->dq_dqb.dqb_ihardlimit;
if (limit && buf->f_files > limit) {
buf->f_files = limit;
buf->f_ffree =
......@@ -2618,6 +2626,21 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
if (RDEV(0).path[0]) {
block_t dev_seg_count = le32_to_cpu(RDEV(0).total_segments);
int i = 1;
while (i < MAX_DEVICES && RDEV(i).path[0]) {
dev_seg_count += le32_to_cpu(RDEV(i).total_segments);
i++;
}
if (segment_count != dev_seg_count) {
f2fs_info(sbi, "Segment count (%u) mismatch with total segments from devices (%u)",
segment_count, dev_seg_count);
return -EFSCORRUPTED;
}
}
if (secs_per_zone > total_sections || !secs_per_zone) {
f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)",
secs_per_zone, total_sections);
......@@ -2852,6 +2875,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
spin_lock_init(&sbi->dev_lock);
init_rwsem(&sbi->sb_lock);
init_rwsem(&sbi->pin_sem);
}
static int init_percpu_info(struct f2fs_sb_info *sbi)
......@@ -2946,6 +2970,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
f2fs_err(sbi, "Unable to read %dth superblock",
block + 1);
err = -EIO;
*recovery = 1;
continue;
}
......@@ -2955,6 +2980,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
block + 1);
brelse(bh);
*recovery = 1;
continue;
}
......@@ -2967,10 +2993,6 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
brelse(bh);
}
/* Fail to read any one of the superblocks*/
if (err < 0)
*recovery = 1;
/* No valid superblock */
if (!*raw_super)
kvfree(super);
......@@ -3324,6 +3346,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sbi->write_io[i][j].bio = NULL;
spin_lock_init(&sbi->write_io[i][j].io_lock);
INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
init_rwsem(&sbi->write_io[i][j].bio_list_lock);
}
}
......@@ -3735,8 +3759,13 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_post_read_processing();
if (err)
goto free_root_stats;
err = f2fs_init_bio_entry_cache();
if (err)
goto free_post_read;
return 0;
free_post_read:
f2fs_destroy_post_read_processing();
free_root_stats:
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
......@@ -3760,6 +3789,7 @@ static int __init init_f2fs_fs(void)
static void __exit exit_f2fs_fs(void)
{
f2fs_destroy_bio_entry_cache();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
unregister_filesystem(&f2fs_fs_type);
......
......@@ -154,6 +154,8 @@ static ssize_t features_show(struct f2fs_attr *a,
if (f2fs_sb_has_casefold(sbi))
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "casefold");
len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
len ? ", " : "", "pin_file");
len += snprintf(buf + len, PAGE_SIZE - len, "\n");
return len;
}
......@@ -443,6 +445,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, main_blkaddr, main_blkaddr);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
......@@ -510,6 +513,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_idle),
ATTR_LIST(gc_urgent),
ATTR_LIST(reclaim_segments),
ATTR_LIST(main_blkaddr),
ATTR_LIST(max_small_discards),
ATTR_LIST(discard_granularity),
ATTR_LIST(batched_trim_sections),
......
......@@ -539,8 +539,9 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
struct inode *inode = d_inode(dentry);
nid_t xnid = F2FS_I(inode)->i_xattr_nid;
struct f2fs_xattr_entry *entry;
void *base_addr;
void *base_addr, *last_base_addr;
int error = 0;
size_t rest = buffer_size;
......@@ -550,6 +551,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if (error)
return error;
last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode);
list_for_each_xattr(entry, base_addr) {
const struct xattr_handler *handler =
f2fs_xattr_handler(entry->e_name_index);
......@@ -557,6 +560,15 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
size_t prefix_len;
size_t size;
if ((void *)(entry) + sizeof(__u32) > last_base_addr ||
(void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) {
f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
inode->i_ino);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
error = -EFSCORRUPTED;
goto cleanup;
}
if (!handler || (handler->list && !handler->list(dentry)))
continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment