Commit 3ef96fcf authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Many ext4 and jbd2 cleanups and bug fixes:

   - Cleanups in the ext4 remount code when going to and from read-only

   - Cleanups in ext4's multiblock allocator

   - Cleanups in the jbd2 setup/mounting code paths

   - Performance improvements when appending to a delayed allocation file

   - Miscellaneous syzbot and other bug fixes"

* tag 'ext4_for_linus-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (60 commits)
  ext4: fix slab-use-after-free in ext4_es_insert_extent()
  libfs: remove redundant checks of s_encoding
  ext4: remove redundant checks of s_encoding
  ext4: reject casefold inode flag without casefold feature
  ext4: use LIST_HEAD() to initialize the list_head in mballoc.c
  ext4: do not mark inode dirty every time when appending using delalloc
  ext4: rename s_error_work to s_sb_upd_work
  ext4: add periodic superblock update check
  ext4: drop dio overwrite only flag and associated warning
  ext4: add correct group descriptors and reserved GDT blocks to system zone
  ext4: remove unused function declaration
  ext4: mballoc: avoid garbage value from err
  ext4: use sbi instead of EXT4_SB(sb) in ext4_mb_new_blocks_simple()
  ext4: change the type of blocksize in ext4_mb_init_cache()
  ext4: fix unttached inode after power cut with orphan file feature enabled
  jbd2: correct the end of the journal recovery scan range
  ext4: ext4_get_{dev}_journal return proper error value
  ext4: cleanup ext4_get_dev_journal() and ext4_get_journal()
  jbd2: jbd2_journal_init_{dev,inode} return proper error return value
  jbd2: drop useless error tag in jbd2_journal_wipe()
  ...
parents 659b3613 768d612f
......@@ -913,10 +913,10 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
}
/*
* This function returns the number of file system metadata clusters at
* This function returns the number of file system metadata blocks at
* the beginning of a block group, including the reserved gdt blocks.
*/
static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
ext4_group_t block_group)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
......@@ -935,8 +935,15 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
} else { /* For META_BG_BLOCK_GROUPS */
num += ext4_bg_num_gdb_meta(sb, block_group);
}
return EXT4_NUM_B2C(sbi, num);
return num;
}
static unsigned int ext4_num_base_meta_clusters(struct super_block *sb,
ext4_group_t block_group)
{
return EXT4_NUM_B2C(EXT4_SB(sb), ext4_num_base_meta_blocks(sb, block_group));
}
/**
* ext4_inode_to_goal_block - return a hint for block allocation
* @inode: inode for block allocation
......
......@@ -215,7 +215,6 @@ int ext4_setup_system_zone(struct super_block *sb)
struct ext4_system_blocks *system_blks;
struct ext4_group_desc *gdp;
ext4_group_t i;
int flex_size = ext4_flex_bg_size(sbi);
int ret;
system_blks = kzalloc(sizeof(*system_blks), GFP_KERNEL);
......@@ -223,12 +222,13 @@ int ext4_setup_system_zone(struct super_block *sb)
return -ENOMEM;
for (i=0; i < ngroups; i++) {
unsigned int meta_blks = ext4_num_base_meta_blocks(sb, i);
cond_resched();
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0))) {
if (meta_blks != 0) {
ret = add_system_zone(system_blks,
ext4_group_first_block_no(sb, i),
ext4_bg_num_gdb(sb, i) + 1, 0);
meta_blks, 0);
if (ret)
goto err;
}
......
......@@ -33,6 +33,8 @@ int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname,
#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, iname, fname);
if (err)
ext4_fname_free_filename(fname);
#endif
return err;
}
......@@ -51,6 +53,8 @@ int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry,
#if IS_ENABLED(CONFIG_UNICODE)
err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname);
if (err)
ext4_fname_free_filename(fname);
#endif
return err;
}
......
......@@ -176,9 +176,6 @@ enum criteria {
EXT4_MB_NUM_CRS
};
/* criteria below which we use fast block scanning and avoid unnecessary IO */
#define CR_FAST CR_GOAL_LEN_SLOW
/*
* Flags used in mballoc's allocation_context flags field.
*
......@@ -1241,6 +1238,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group
* scanning in mballoc
*/
#define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */
#define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \
~EXT4_MOUNT_##opt
......@@ -1258,10 +1256,8 @@ struct ext4_inode_info {
#define ext4_test_and_set_bit __test_and_set_bit_le
#define ext4_set_bit __set_bit_le
#define ext4_set_bit_atomic ext2_set_bit_atomic
#define ext4_test_and_clear_bit __test_and_clear_bit_le
#define ext4_clear_bit __clear_bit_le
#define ext4_clear_bit_atomic ext2_clear_bit_atomic
#define ext4_test_bit test_bit_le
#define ext4_find_next_zero_bit find_next_zero_bit_le
#define ext4_find_next_bit find_next_bit_le
......@@ -1708,10 +1704,13 @@ struct ext4_sb_info {
const char *s_last_error_func;
time64_t s_last_error_time;
/*
* If we are in a context where we cannot update error information in
* the on-disk superblock, we queue this work to do it.
* If we are in a context where we cannot update the on-disk
* superblock, we queue the work here. This is used to update
* the error information in the superblock, and for periodic
* updates of the superblock called from the commit callback
* function.
*/
struct work_struct s_error_work;
struct work_struct s_sb_upd_work;
/* Ext4 fast commit sub transaction ID */
atomic_t s_fc_subtid;
......@@ -1804,7 +1803,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
*/
enum {
EXT4_MF_MNTDIR_SAMPLED,
EXT4_MF_FS_ABORTED, /* Fatal error detected */
EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */
};
......@@ -2228,9 +2226,9 @@ extern int ext4_feature_set_ok(struct super_block *sb, int readonly);
#define EXT4_FLAGS_SHUTDOWN 1
#define EXT4_FLAGS_BDEV_IS_DAX 2
static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
static inline int ext4_forced_shutdown(struct super_block *sb)
{
return test_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);
return test_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
}
/*
......@@ -2708,7 +2706,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
extern int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
s64 nclusters, unsigned int flags);
extern ext4_fsblk_t ext4_count_free_clusters(struct super_block *);
extern void ext4_check_blocks_bitmap(struct super_block *);
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
ext4_group_t block_group,
struct buffer_head ** bh);
......@@ -2864,7 +2861,6 @@ extern void ext4_free_inode(handle_t *, struct inode *);
extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
extern unsigned long ext4_count_free_inodes(struct super_block *);
extern unsigned long ext4_count_dirs(struct super_block *);
extern void ext4_check_inodes_bitmap(struct super_block *);
extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
extern int ext4_init_inode_table(struct super_block *sb,
ext4_group_t group, int barrier);
......@@ -2907,7 +2903,6 @@ extern int ext4_mb_init(struct super_block *);
extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *, unsigned int);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
......@@ -2930,6 +2925,10 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
extern void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
int len, int state);
static inline bool ext4_mb_cr_expensive(enum criteria cr)
{
return cr >= CR_GOAL_LEN_SLOW;
}
/* inode.c */
void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
......@@ -2983,7 +2982,6 @@ extern void ext4_evict_inode(struct inode *);
extern void ext4_clear_inode(struct inode *);
extern int ext4_file_getattr(struct mnt_idmap *, const struct path *,
struct kstat *, u32, unsigned int);
extern int ext4_sync_inode(handle_t *, struct inode *);
extern void ext4_dirty_inode(struct inode *, int);
extern int ext4_change_inode_journal_flag(struct inode *, int);
extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
......@@ -3090,6 +3088,8 @@ extern const char *ext4_decode_error(struct super_block *sb, int errno,
extern void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
ext4_group_t block_group,
unsigned int flags);
extern unsigned int ext4_num_base_meta_blocks(struct super_block *sb,
ext4_group_t block_group);
extern __printf(7, 8)
void __ext4_error(struct super_block *, const char *, unsigned int, bool,
......@@ -3531,8 +3531,6 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
/* inline.c */
extern int ext4_get_max_inline_size(struct inode *inode);
extern int ext4_find_inline_data_nolock(struct inode *inode);
extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
unsigned int len);
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
int ext4_readpage_inline(struct inode *inode, struct folio *folio);
......
......@@ -67,11 +67,12 @@ static int ext4_journal_check_start(struct super_block *sb)
might_sleep();
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
if (unlikely(ext4_forced_shutdown(sb)))
return -EIO;
if (sb_rdonly(sb))
if (WARN_ON_ONCE(sb_rdonly(sb)))
return -EROFS;
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
/*
......
......@@ -878,23 +878,29 @@ void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
err1 = __es_remove_extent(inode, lblk, end, NULL, es1);
if (err1 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
if (es1) {
if (!es1->es_len)
__es_free_extent(es1);
es1 = NULL;
}
err2 = __es_insert_extent(inode, &newes, es2);
if (err2 == -ENOMEM && !ext4_es_must_keep(&newes))
err2 = 0;
if (err2 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
if (es2) {
if (!es2->es_len)
__es_free_extent(es2);
es2 = NULL;
}
if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&
(status & EXTENT_STATUS_WRITTEN ||
status & EXTENT_STATUS_UNWRITTEN))
__revise_pending(inode, lblk, len);
/* es is pre-allocated but not used, free it. */
if (es1 && !es1->es_len)
__es_free_extent(es1);
if (es2 && !es2->es_len)
__es_free_extent(es2);
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2)
......@@ -1491,8 +1497,12 @@ void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
*/
write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(inode, lblk, end, &reserved, es);
if (es && !es->es_len)
/* Free preallocated extent if it didn't get used. */
if (es) {
if (!es->es_len)
__es_free_extent(es);
es = NULL;
}
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err)
goto retry;
......@@ -2047,19 +2057,25 @@ void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,
err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1);
if (err1 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
if (es1) {
if (!es1->es_len)
__es_free_extent(es1);
es1 = NULL;
}
err2 = __es_insert_extent(inode, &newes, es2);
if (err2 != 0)
goto error;
/* Free preallocated extent if it didn't get used. */
if (es2) {
if (!es2->es_len)
__es_free_extent(es2);
es2 = NULL;
}
if (allocated)
__insert_pending(inode, lblk);
/* es is pre-allocated but not used, free it. */
if (es1 && !es1->es_len)
__es_free_extent(es1);
if (es2 && !es2->es_len)
__es_free_extent(es2);
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
if (err1 || err2)
......
......@@ -131,7 +131,7 @@ static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (!iov_iter_count(to))
......@@ -153,7 +153,7 @@ static ssize_t ext4_file_splice_read(struct file *in, loff_t *ppos,
{
struct inode *inode = file_inode(in);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
return filemap_splice_read(in, ppos, pipe, len, flags);
}
......@@ -476,6 +476,11 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
* required to change security info in file_modified(), for extending
* I/O, any form of non-overwrite I/O, and unaligned I/O to unwritten
* extents (as partial block zeroing may be required).
*
* Note that unaligned writes are allowed under shared lock so long as
* they are pure overwrites. Otherwise, concurrent unaligned writes risk
* data corruption due to partial block zeroing in the dio layer, and so
* the I/O must occur exclusively.
*/
if (*ilock_shared &&
((!IS_NOSEC(inode) || *extend || !overwrite ||
......@@ -492,21 +497,12 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
/*
* Now that locking is settled, determine dio flags and exclusivity
* requirements. Unaligned writes are allowed under shared lock so long
* as they are pure overwrites. Set the iomap overwrite only flag as an
* added precaution in this case. Even though this is unnecessary, we
* can detect and warn on unexpected -EAGAIN if an unsafe unaligned
* write is ever submitted.
*
* Otherwise, concurrent unaligned writes risk data corruption due to
* partial block zeroing in the dio layer, and so the I/O must occur
* exclusively. The inode lock is already held exclusive if the write is
* non-overwrite or extending, so drain all outstanding dio and set the
* force wait dio flag.
* requirements. We don't use DIO_OVERWRITE_ONLY because we enforce
* behavior already. The inode lock is already held exclusive if the
* write is non-overwrite or extending, so drain all outstanding dio and
* set the force wait dio flag.
*/
if (*ilock_shared && unaligned_io) {
*dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
} else if (!*ilock_shared && (unaligned_io || *extend)) {
if (!*ilock_shared && (unaligned_io || *extend)) {
if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN;
goto out;
......@@ -608,7 +604,6 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
dio_flags, NULL, 0);
WARN_ON_ONCE(ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT));
if (ret == -ENOTBLK)
ret = 0;
......@@ -709,7 +704,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
#ifdef CONFIG_FS_DAX
......@@ -806,10 +801,9 @@ static const struct vm_operations_struct ext4_file_vm_ops = {
static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct dax_device *dax_dev = sbi->s_daxdev;
struct dax_device *dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
if (unlikely(ext4_forced_shutdown(sbi)))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
/*
......@@ -885,7 +879,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
{
int ret;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
......
......@@ -131,9 +131,8 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
int ret = 0, err;
bool needs_barrier = false;
struct inode *inode = file->f_mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (unlikely(ext4_forced_shutdown(sbi)))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
ASSERT(ext4_journal_current_handle() == NULL);
......@@ -141,14 +140,14 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_ext4_sync_file_enter(file, datasync);
if (sb_rdonly(inode->i_sb)) {
/* Make sure that we read updated s_mount_flags value */
/* Make sure that we read updated s_ext4_flags value */
smp_rmb();
if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))
if (ext4_forced_shutdown(inode->i_sb))
ret = -EROFS;
goto out;
}
if (!sbi->s_journal) {
if (!EXT4_SB(inode->i_sb)->s_journal) {
ret = ext4_fsync_nojournal(file, start, end, datasync,
&needs_barrier);
if (needs_barrier)
......
......@@ -300,7 +300,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
if (len && IS_CASEFOLDED(dir) && um &&
if (len && IS_CASEFOLDED(dir) &&
(!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir))) {
buff = kzalloc(sizeof(char) * PATH_MAX, GFP_KERNEL);
if (!buff)
......
......@@ -950,7 +950,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
sb = dir->i_sb;
sbi = EXT4_SB(sb);
if (unlikely(ext4_forced_shutdown(sbi)))
if (unlikely(ext4_forced_shutdown(sb)))
return ERR_PTR(-EIO);
ngroups = ext4_get_groups_count(sb);
......@@ -1523,12 +1523,6 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int num, ret = 0, used_blks = 0;
unsigned long used_inos = 0;
/* This should not happen, but just to be sure check this */
if (sb_rdonly(sb)) {
ret = 1;
goto out;
}
gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
if (!gdp || !grp)
goto out;
......
......@@ -228,7 +228,7 @@ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
struct ext4_inode *raw_inode;
int cp_len = 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return;
BUG_ON(!EXT4_I(inode)->i_inline_off);
......
......@@ -1114,7 +1114,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
unsigned from, to;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
trace_ext4_write_begin(inode, pos, len);
......@@ -2213,8 +2213,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
if (err < 0) {
struct super_block *sb = inode->i_sb;
if (ext4_forced_shutdown(EXT4_SB(sb)) ||
ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
if (ext4_forced_shutdown(sb))
goto invalidate_dirty_pages;
/*
* Let the uper layers retry transient errors.
......@@ -2534,14 +2533,13 @@ static int ext4_do_writepages(struct mpage_da_data *mpd)
* If the filesystem has aborted, it is read-only, so return
* right away instead of dumping stack traces later on that
* will obscure the real source of the problem. We test
* EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because
* fs shutdown state instead of sb->s_flag's SB_RDONLY because
* the latter could be true if the filesystem is mounted
* read-only, and in that case, ext4_writepages should
* *never* be called, so if that ever happens, we would want
* the stack trace.
*/
if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) ||
ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) {
if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) {
ret = -EROFS;
goto out_writepages;
}
......@@ -2759,7 +2757,7 @@ static int ext4_writepages(struct address_space *mapping,
int ret;
int alloc_ctx;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
if (unlikely(ext4_forced_shutdown(sb)))
return -EIO;
alloc_ctx = ext4_writepages_down_read(sb);
......@@ -2798,16 +2796,16 @@ static int ext4_dax_writepages(struct address_space *mapping,
int ret;
long nr_to_write = wbc->nr_to_write;
struct inode *inode = mapping->host;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
int alloc_ctx;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
alloc_ctx = ext4_writepages_down_read(inode->i_sb);
trace_ext4_writepages(inode, wbc);
ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
ret = dax_writeback_mapping_range(mapping,
EXT4_SB(inode->i_sb)->s_daxdev, wbc);
trace_ext4_writepages_result(inode, wbc, ret,
nr_to_write - wbc->nr_to_write);
ext4_writepages_up_read(inode->i_sb, alloc_ctx);
......@@ -2857,7 +2855,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
pgoff_t index;
struct inode *inode = mapping->host;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
index = pos >> PAGE_SHIFT;
......@@ -2937,14 +2935,73 @@ static int ext4_da_should_update_i_disksize(struct folio *folio,
return 1;
}
static int ext4_da_do_write_end(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page)
{
struct inode *inode = mapping->host;
loff_t old_size = inode->i_size;
bool disksize_changed = false;
loff_t new_i_size;
/*
* block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
* flag, which all that's needed to trigger page writeback.
*/
copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
new_i_size = pos + copied;
/*
* It's important to update i_size while still holding page lock,
* because page writeout could otherwise come in and zero beyond
* i_size.
*
* Since we are holding inode lock, we are sure i_disksize <=
* i_size. We also know that if i_disksize < i_size, there are
* delalloc writes pending in the range up to i_size. If the end of
* the current write is <= i_size, there's no need to touch
* i_disksize since writeback will push i_disksize up to i_size
* eventually. If the end of the current write is > i_size and
* inside an allocated block which ext4_da_should_update_i_disksize()
* checked, we need to update i_disksize here as certain
* ext4_writepages() paths not allocating blocks and update i_disksize.
*/
if (new_i_size > inode->i_size) {
unsigned long end;
i_size_write(inode, new_i_size);
end = (new_i_size - 1) & (PAGE_SIZE - 1);
if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
ext4_update_i_disksize(inode, new_i_size);
disksize_changed = true;
}
}
unlock_page(page);
put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
if (disksize_changed) {
handle_t *handle;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle))
return PTR_ERR(handle);
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
}
return copied;
}
static int ext4_da_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
loff_t new_i_size;
unsigned long start, end;
int write_mode = (int)(unsigned long)fsdata;
struct folio *folio = page_folio(page);
......@@ -2963,30 +3020,7 @@ static int ext4_da_write_end(struct file *file,
if (unlikely(copied < len) && !PageUptodate(page))
copied = 0;
start = pos & (PAGE_SIZE - 1);
end = start + copied - 1;
/*
* Since we are holding inode lock, we are sure i_disksize <=
* i_size. We also know that if i_disksize < i_size, there are
* delalloc writes pending in the range upto i_size. If the end of
* the current write is <= i_size, there's no need to touch
* i_disksize since writeback will push i_disksize upto i_size
* eventually. If the end of the current write is > i_size and
* inside an allocated block (ext4_da_should_update_i_disksize()
* check), we need to update i_disksize here as certain
* ext4_writepages() paths not allocating blocks update i_disksize.
*
* Note that we defer inode dirtying to generic_write_end() /
* ext4_da_write_inline_data_end().
*/
new_i_size = pos + copied;
if (copied && new_i_size > inode->i_size &&
ext4_da_should_update_i_disksize(folio, end))
ext4_update_i_disksize(inode, new_i_size);
return generic_write_end(file, mapping, pos, len, copied, &folio->page,
fsdata);
return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
}
/*
......@@ -4940,9 +4974,12 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
"iget: bogus i_mode (%o)", inode->i_mode);
goto bad_inode;
}
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb))
if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) {
ext4_error_inode(inode, function, line, 0,
"casefold flag without casefold feature");
ret = -EFSCORRUPTED;
goto bad_inode;
}
if ((err_str = check_igot_inode(inode, flags)) != NULL) {
ext4_error_inode(inode, function, line, 0, err_str);
ret = -EFSCORRUPTED;
......@@ -5131,11 +5168,10 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int err;
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) ||
sb_rdonly(inode->i_sb))
if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
return 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (EXT4_SB(inode->i_sb)->s_journal) {
......@@ -5255,7 +5291,7 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
const unsigned int ia_valid = attr->ia_valid;
bool inc_ivers = true;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (unlikely(IS_IMMUTABLE(inode)))
......@@ -5674,7 +5710,7 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{
int err = 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
if (unlikely(ext4_forced_shutdown(inode->i_sb))) {
put_bh(iloc->bh);
return -EIO;
}
......@@ -5700,7 +5736,7 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
{
int err;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
err = ext4_get_inode_loc(inode, iloc);
......
......@@ -802,7 +802,7 @@ int ext4_force_shutdown(struct super_block *sb, u32 flags)
if (flags > EXT4_GOING_FLAGS_NOLOGFLUSH)
return -EINVAL;
if (ext4_forced_shutdown(sbi))
if (ext4_forced_shutdown(sb))
return 0;
ext4_msg(sb, KERN_ALERT, "shut down requested (%d)", flags);
......
This diff is collapsed.
......@@ -233,6 +233,20 @@ static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
(fex->fe_start << EXT4_SB(sb)->s_cluster_bits);
}
static inline loff_t extent_logical_end(struct ext4_sb_info *sbi,
struct ext4_free_extent *fex)
{
/* Use loff_t to avoid end exceeding ext4_lblk_t max. */
return (loff_t)fex->fe_logical + EXT4_C2B(sbi, fex->fe_len);
}
static inline loff_t pa_logical_end(struct ext4_sb_info *sbi,
struct ext4_prealloc_space *pa)
{
/* Use loff_t to avoid end exceeding ext4_lblk_t max. */
return (loff_t)pa->pa_lstart + EXT4_C2B(sbi, pa->pa_len);
}
typedef int (*ext4_mballoc_query_range_fn)(
struct super_block *sb,
ext4_group_t agno,
......
......@@ -162,7 +162,7 @@ static int kmmpd(void *data)
memcpy(mmp->mmp_nodename, init_utsname()->nodename,
sizeof(mmp->mmp_nodename));
while (!kthread_should_stop() && !sb_rdonly(sb)) {
while (!kthread_should_stop() && !ext4_forced_shutdown(sb)) {
if (!ext4_has_feature_mmp(sb)) {
ext4_warning(sb, "kmmpd being stopped since MMP feature"
" has been disabled.");
......
......@@ -1445,7 +1445,7 @@ int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
struct dx_hash_info *hinfo = &name->hinfo;
int len;
if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
if (!IS_CASEFOLDED(dir) ||
(IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
cf_name->name = NULL;
return 0;
......@@ -1496,7 +1496,7 @@ static bool ext4_match(struct inode *parent,
#endif
#if IS_ENABLED(CONFIG_UNICODE)
if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
if (IS_CASEFOLDED(parent) &&
(!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
if (fname->cf_name.name) {
struct qstr cf = {.name = fname->cf_name.name,
......@@ -2393,7 +2393,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
#if IS_ENABLED(CONFIG_UNICODE)
if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
utf8_validate(sb->s_encoding, &dentry->d_name))
return -EINVAL;
#endif
......@@ -2799,6 +2799,7 @@ static int ext4_add_nondir(handle_t *handle,
return err;
}
drop_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
return err;
......@@ -3142,7 +3143,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
struct ext4_dir_entry_2 *de;
handle_t *handle = NULL;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
return -EIO;
/* Initialize quotas before so that eventual writes go in
......@@ -3302,7 +3303,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
{
int retval;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
return -EIO;
trace_ext4_unlink_enter(dir, dentry);
......@@ -3370,7 +3371,7 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct fscrypt_str disk_link;
int retries = 0;
if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
if (unlikely(ext4_forced_shutdown(dir->i_sb)))
return -EIO;
err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
......@@ -3437,6 +3438,7 @@ static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
err_drop_inode:
clear_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
ext4_orphan_add(handle, inode);
unlock_new_inode(inode);
if (handle)
......@@ -4021,6 +4023,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
ext4_resetent(handle, &old,
old.inode->i_ino, old_file_type);
drop_nlink(whiteout);
ext4_mark_inode_dirty(handle, whiteout);
ext4_orphan_add(handle, whiteout);
}
unlock_new_inode(whiteout);
......@@ -4187,7 +4190,7 @@ static int ext4_rename2(struct mnt_idmap *idmap,
{
int err;
if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
if (unlikely(ext4_forced_shutdown(old_dir->i_sb)))
return -EIO;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
......
......@@ -184,7 +184,7 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
io_end->handle = NULL; /* Following call will use up the handle */
ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) {
if (ret < 0 && !ext4_forced_shutdown(inode->i_sb)) {
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
"extents -- potential data loss! "
......
This diff is collapsed.
......@@ -701,7 +701,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
{
int error;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
if (unlikely(ext4_forced_shutdown(inode->i_sb)))
return -EIO;
if (strlen(name) > 255)
......
......@@ -40,18 +40,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
}
}
/*
* Check a checkpoint buffer could be release or not.
*
* Requires j_list_lock
*/
static inline bool __cp_buffer_busy(struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
return (jh->b_transaction || buffer_locked(bh) || buffer_dirty(bh));
}
/*
* __jbd2_log_wait_for_space: wait until there is space in the journal.
*
......@@ -349,6 +337,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* Checkpoint list management */
enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
/*
* journal_shrink_one_cp_list
*
......@@ -360,7 +350,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
bool destroy, bool *released)
enum shrink_type type,
bool *released)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
......@@ -376,12 +367,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
jh = next_jh;
next_jh = jh->b_cpnext;
if (destroy) {
if (type == SHRINK_DESTROY) {
ret = __jbd2_journal_remove_checkpoint(jh);
} else {
ret = jbd2_journal_try_remove_checkpoint(jh);
if (ret < 0)
if (ret < 0) {
if (type == SHRINK_BUSY_SKIP)
continue;
break;
}
}
nr_freed++;
......@@ -445,7 +439,7 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
tid = transaction->t_tid;
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
false, &released);
SHRINK_BUSY_SKIP, &released);
nr_freed += freed;
(*nr_to_scan) -= min(*nr_to_scan, freed);
if (*nr_to_scan == 0)
......@@ -485,19 +479,21 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
{
transaction_t *transaction, *last_transaction, *next_transaction;
enum shrink_type type;
bool released;
transaction = journal->j_checkpoint_transactions;
if (!transaction)
return;
type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
journal_shrink_one_cp_list(transaction->t_checkpoint_list,
destroy, &released);
type, &released);
/*
* This function only frees up some memory if possible so we
* dont have an obligation to finish processing. Bail out if
......@@ -631,6 +627,8 @@ int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
if (jh->b_transaction)
return -EBUSY;
if (!trylock_buffer(bh))
return -EBUSY;
if (buffer_dirty(bh)) {
......
This diff is collapsed.
......@@ -230,12 +230,8 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
/* Make sure we wrap around the log correctly! */
#define wrap(journal, var) \
do { \
unsigned long _wrap_last = \
jbd2_has_feature_fast_commit(journal) ? \
(journal)->j_fc_last : (journal)->j_last; \
\
if (var >= _wrap_last) \
var -= (_wrap_last - (journal)->j_first); \
if (var >= (journal)->j_last) \
var -= ((journal)->j_last - (journal)->j_first); \
} while (0)
static int fc_do_one_pass(journal_t *journal,
......@@ -524,9 +520,7 @@ static int do_one_pass(journal_t *journal,
break;
jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
next_commit_ID, next_log_block,
jbd2_has_feature_fast_commit(journal) ?
journal->j_fc_last : journal->j_last);
next_commit_ID, next_log_block, journal->j_last);
/* Skip over each chunk of the transaction looking
* either the next descriptor block or the final commit
......
......@@ -1648,16 +1648,6 @@ bool is_empty_dir_inode(struct inode *inode)
}
#if IS_ENABLED(CONFIG_UNICODE)
/*
* Determine if the name of a dentry should be casefolded.
*
* Return: if names will need casefolding
*/
static bool needs_casefold(const struct inode *dir)
{
return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
}
/**
* generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
* @dentry: dentry whose name we are checking against
......@@ -1678,7 +1668,7 @@ static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
char strbuf[DNAME_INLINE_LEN];
int ret;
if (!dir || !needs_casefold(dir))
if (!dir || !IS_CASEFOLDED(dir))
goto fallback;
/*
* If the dentry name is stored in-line, then it may be concurrently
......@@ -1720,7 +1710,7 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
const struct unicode_map *um = sb->s_encoding;
int ret = 0;
if (!dir || !needs_casefold(dir))
if (!dir || !IS_CASEFOLDED(dir))
return 0;
ret = utf8_casefold_hash(um, dentry, str);
......
......@@ -908,9 +908,9 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
/* call the kernels journal init function now */
j_journal = jbd2_journal_init_inode(inode);
if (j_journal == NULL) {
if (IS_ERR(j_journal)) {
mlog(ML_ERROR, "Linux journal layer error\n");
status = -EINVAL;
status = PTR_ERR(j_journal);
goto done;
}
......@@ -1684,9 +1684,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
}
journal = jbd2_journal_init_inode(inode);
if (journal == NULL) {
if (IS_ERR(journal)) {
mlog(ML_ERROR, "Linux journal layer error\n");
status = -EIO;
status = PTR_ERR(journal);
goto done;
}
......
......@@ -630,11 +630,6 @@ struct transaction_s
*/
struct list_head t_inode_list;
/*
* Protects info related to handles
*/
spinlock_t t_handle_lock;
/*
* Longest time some handle had to wait for running transaction
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment