Commit ae67d9a8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "New features for 3.12:

   - Added aggressive extent caching using the extent status tree.  This
     can actually decrease memory usage in read-mostly workloads since
     the information is much more compactly stored in the extent status
     tree than if we had to keep the extent tree metadata blocks in the
     buffer cache.  This also improves Asynchronous I/O since it is it
     makes much less likely that we need to do metadata I/O to lookup
     the extent tree information.

   - Improve the recovery after corrupted allocation bitmaps are found
     when running in errors=ignore mode.

  Also fixed some writeback vs truncate races when using a blocksize
  less than the page size"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits)
  ext4: allow specifying external journal by pathname mount option
  ext4: mark group corrupt on group descriptor checksum
  ext4: mark block group as corrupt on inode bitmap error
  ext4: mark block group as corrupt on block bitmap error
  ext4: fix type declaration of ext4_validate_block_bitmap
  ext4: error out if verifying the block bitmap fails
  jbd2: Fix endian mixing problems in the checksumming code
  ext4: isolate ext4_extents.h file
  ext4: Fix misspellings using 'codespell' tool
  ext4: convert write_begin methods to stable_page_writes semantics
  ext4: fix use of potentially uninitialized variables in debugging code
  ext4: fix lost truncate due to race with writeback
  ext4: simplify truncation code in ext4_setattr()
  ext4: fix ext4_writepages() in presence of truncate
  ext4: move test whether extent to map can be extended to one place
  ext4: fix warning in ext4_da_update_reserve_space()
  quota: provide interface for readding allocated space into reserved space
  ext4: avoid reusing recently deleted inodes in no journal mode
  ext4: allocate delayed allocation blocks before rename
  ext4: start handle at least possible moment when renaming files
  ...
parents 71c7356f ad4eec61
......@@ -144,11 +144,12 @@ journal_async_commit Commit block can be written to disk without waiting
mount the device. This will enable 'journal_checksum'
internally.
journal_path=path
journal_dev=devnum When the external journal device's major/minor numbers
have changed, this option allows the user to specify
have changed, these options allow the user to specify
the new journal location. The journal device is
identified through its new major/minor numbers encoded
in devnum.
identified through either its new major/minor numbers
encoded in devnum, or via a path to the device.
norecovery Don't load the journal on mounting. Note that
noload if the filesystem was not unmounted cleanly,
......
......@@ -41,7 +41,7 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
/**
* Check if the given dir-inode refers to an htree-indexed directory
* (or a directory which chould potentially get coverted to use htree
* (or a directory which could potentially get converted to use htree
* indexing).
*
* Return 1 if it is a dx dir, 0 if not
......
......@@ -184,6 +184,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start, tmp;
int flex_bg = 0;
struct ext4_group_info *grp;
J_ASSERT_BH(bh, buffer_locked(bh));
......@@ -191,11 +192,9 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
ext4_free_group_clusters_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
grp = ext4_get_group_info(sb, block_group);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
memset(bh->b_data, 0, sb->s_blocksize);
......@@ -305,7 +304,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
*/
static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
struct ext4_group_desc *desc,
unsigned int block_group,
ext4_group_t block_group,
struct buffer_head *bh)
{
ext4_grpblk_t offset;
......@@ -352,10 +351,11 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
void ext4_validate_block_bitmap(struct super_block *sb,
struct ext4_group_desc *desc,
unsigned int block_group,
ext4_group_t block_group,
struct buffer_head *bh)
{
ext4_fsblk_t blk;
struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
if (buffer_verified(bh))
return;
......@@ -366,12 +366,14 @@ void ext4_validate_block_bitmap(struct super_block *sb,
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
block_group, blk);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
desc, bh))) {
ext4_unlock_group(sb, block_group);
ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
return;
}
set_buffer_verified(bh);
......@@ -445,7 +447,10 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
return bh;
verify:
ext4_validate_block_bitmap(sb, desc, block_group, bh);
return bh;
if (buffer_verified(bh))
return bh;
put_bh(bh);
return NULL;
}
/* Returns 0 on success, 1 on error */
......@@ -469,7 +474,8 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
clear_buffer_new(bh);
/* Panic or remount fs read-only if block bitmap is invalid */
ext4_validate_block_bitmap(sb, desc, block_group, bh);
return 0;
/* ...but check for error just in case errors=continue. */
return !buffer_verified(bh);
}
struct buffer_head *
......
......@@ -33,7 +33,7 @@ static int ext4_dx_readdir(struct file *, struct dir_context *);
/**
* Check if the given dir-inode refers to an htree-indexed directory
* (or a directory which chould potentially get coverted to use htree
* (or a directory which could potentially get converted to use htree
* indexing).
*
* Return 1 if it is a dx dir, 0 if not
......
......@@ -560,6 +560,18 @@ enum {
/* Do not put hole in extent cache */
#define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200
/*
* The bit position of these flags must not overlap with any of the
* EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(),
* read_extent_tree_block(), ext4_split_extent_at(),
* ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
* EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
* caching the extents when reading from the extent tree while a
* truncate or punch hole operation is in progress.
*/
#define EXT4_EX_NOCACHE 0x0400
#define EXT4_EX_FORCE_CACHE 0x0800
/*
* Flags used by ext4_free_blocks
*/
......@@ -569,6 +581,7 @@ enum {
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
#define EXT4_FREE_BLOCKS_RESERVE 0x0040
/*
* ioctl commands
......@@ -590,6 +603,7 @@ enum {
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
#define EXT4_IOC_PRECACHE_EXTENTS _IO('f', 18)
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
/*
......@@ -1375,6 +1389,7 @@ enum {
nolocking */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_ORDERED_MODE, /* data=ordered mode */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
};
#define EXT4_INODE_BIT_FNS(name, field, offset) \
......@@ -1915,7 +1930,7 @@ extern ext4_group_t ext4_get_group_number(struct super_block *sb,
extern void ext4_validate_block_bitmap(struct super_block *sb,
struct ext4_group_desc *desc,
unsigned int block_group,
ext4_group_t block_group,
struct buffer_head *bh);
extern unsigned int ext4_block_group(struct super_block *sb,
ext4_fsblk_t blocknr);
......@@ -2417,16 +2432,32 @@ do { \
#define EXT4_FREECLUSTERS_WATERMARK 0
#endif
/* Update i_disksize. Requires i_mutex to avoid races with truncate */
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{
/*
* XXX: replace with spinlock if seen contended -bzzz
*/
WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
!mutex_is_locked(&inode->i_mutex));
down_write(&EXT4_I(inode)->i_data_sem);
if (newsize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = newsize;
up_write(&EXT4_I(inode)->i_data_sem);
return ;
}
/*
* Update i_disksize after writeback has been started. Races with truncate
* are avoided by checking i_size under i_data_sem.
*/
static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
{
loff_t i_size;
down_write(&EXT4_I(inode)->i_data_sem);
i_size = i_size_read(inode);
if (newsize > i_size)
newsize = i_size;
if (newsize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = newsize;
up_write(&EXT4_I(inode)->i_data_sem);
}
struct ext4_group_info {
......@@ -2449,9 +2480,15 @@ struct ext4_group_info {
#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
#define EXT4_GROUP_INFO_WAS_TRIMMED_BIT 1
#define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT 2
#define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT 3
#define EXT4_MB_GRP_NEED_INIT(grp) \
(test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_BBITMAP_CORRUPT(grp) \
(test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_IBITMAP_CORRUPT(grp) \
(test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
#define EXT4_MB_GRP_WAS_TRIMMED(grp) \
(test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
......@@ -2655,6 +2692,12 @@ extern int ext4_check_blockref(const char *, unsigned int,
struct ext4_ext_path;
struct ext4_extent;
/*
* Maximum number of logical blocks in a file; ext4_extent's ee_block is
* __le32.
*/
#define EXT_MAX_BLOCKS 0xffffffff
extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);
......@@ -2684,7 +2727,8 @@ extern int ext4_ext_insert_extent(handle_t *, struct inode *,
struct ext4_ext_path *,
struct ext4_extent *, int);
extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *);
struct ext4_ext_path *,
int flags);
extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode);
extern int ext4_find_delalloc_range(struct inode *inode,
......@@ -2693,7 +2737,7 @@ extern int ext4_find_delalloc_range(struct inode *inode,
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len);
extern int ext4_ext_precache(struct inode *inode);
/* move_extent.c */
extern void ext4_double_down_write_data_sem(struct inode *first,
......
......@@ -133,12 +133,6 @@ struct ext4_ext_path {
* structure for external API
*/
/*
* Maximum number of logical blocks in a file; ext4_extent's ee_block is
* __le32.
*/
#define EXT_MAX_BLOCKS 0xffffffff
/*
* EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
* initialized extent. This is 2^15 and not (2^16 - 1), since we use the
......
......@@ -197,7 +197,7 @@ static inline void ext4_journal_callback_add(handle_t *handle,
* ext4_journal_callback_del: delete a registered callback
* @handle: active journal transaction handle on which callback was registered
* @jce: registered journal callback entry to unregister
* Return true if object was sucessfully removed
* Return true if object was successfully removed
*/
static inline bool ext4_journal_callback_try_del(handle_t *handle,
struct ext4_journal_cb_entry *jce)
......
This diff is collapsed.
......@@ -13,7 +13,6 @@
#include <linux/list_sort.h>
#include "ext4.h"
#include "extents_status.h"
#include "ext4_extents.h"
#include <trace/events/ext4.h>
......@@ -263,7 +262,7 @@ void ext4_es_find_delayed_extent_range(struct inode *inode,
if (tree->cache_es) {
es1 = tree->cache_es;
if (in_range(lblk, es1->es_lblk, es1->es_len)) {
es_debug("%u cached by [%u/%u) %llu %llx\n",
es_debug("%u cached by [%u/%u) %llu %x\n",
lblk, es1->es_lblk, es1->es_len,
ext4_es_pblock(es1), ext4_es_status(es1));
goto out;
......@@ -409,6 +408,8 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
}
#ifdef ES_AGGRESSIVE_TEST
#include "ext4_extents.h" /* Needed when ES_AGGRESSIVE_TEST is defined */
static void ext4_es_insert_extent_ext_check(struct inode *inode,
struct extent_status *es)
{
......@@ -419,7 +420,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
unsigned short ee_len;
int depth, ee_status, es_status;
path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path))
return;
......@@ -641,13 +642,13 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
*/
int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned long long status)
unsigned int status)
{
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
int err = 0;
es_debug("add [%u/%u) %llu %llx to extent status tree of inode %lu\n",
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
lblk, len, pblk, status, inode->i_ino);
if (!len)
......@@ -683,6 +684,38 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
return err;
}
/*
* ext4_es_cache_extent() inserts information into the extent status
* tree if and only if there isn't information about the range in
* question already.
*/
void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned int status)
{
struct extent_status *es;
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
newes.es_lblk = lblk;
newes.es_len = len;
ext4_es_store_pblock(&newes, pblk);
ext4_es_store_status(&newes, status);
trace_ext4_es_cache_extent(inode, &newes);
if (!len)
return;
BUG_ON(end < lblk);
write_lock(&EXT4_I(inode)->i_es_lock);
es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);
if (!es || es->es_lblk > end)
__es_insert_extent(inode, &newes);
write_unlock(&EXT4_I(inode)->i_es_lock);
}
/*
* ext4_es_lookup_extent() looks up an extent in extent status tree.
*
......@@ -871,23 +904,6 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
return err;
}
int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_lblk_t ee_block;
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex);
if (ee_len == 0)
return 0;
return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
EXTENT_STATUS_WRITTEN);
}
static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
struct list_head *b)
{
......@@ -895,6 +911,12 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
eia = list_entry(a, struct ext4_inode_info, i_es_lru);
eib = list_entry(b, struct ext4_inode_info, i_es_lru);
if (ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
!ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
return 1;
if (!ext4_test_inode_state(&eia->vfs_inode, EXT4_STATE_EXT_PRECACHED) &&
ext4_test_inode_state(&eib->vfs_inode, EXT4_STATE_EXT_PRECACHED))
return -1;
if (eia->i_touch_when == eib->i_touch_when)
return 0;
if (time_after(eia->i_touch_when, eib->i_touch_when))
......@@ -908,21 +930,13 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
{
struct ext4_inode_info *ei;
struct list_head *cur, *tmp;
LIST_HEAD(skiped);
LIST_HEAD(skipped);
int ret, nr_shrunk = 0;
int retried = 0, skip_precached = 1, nr_skipped = 0;
spin_lock(&sbi->s_es_lru_lock);
/*
* If the inode that is at the head of LRU list is newer than
* last_sorted time, that means that we need to sort this list.
*/
ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru);
if (sbi->s_es_last_sorted < ei->i_touch_when) {
list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
sbi->s_es_last_sorted = jiffies;
}
retry:
list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
/*
* If we have already reclaimed all extents from extent
......@@ -933,9 +947,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
/* Skip the inode that is newer than the last_sorted time */
if (sbi->s_es_last_sorted < ei->i_touch_when) {
list_move_tail(cur, &skiped);
/*
* Skip the inode that is newer than the last_sorted
* time. Normally we try hard to avoid shrinking
* precached inodes, but we will as a last resort.
*/
if ((sbi->s_es_last_sorted < ei->i_touch_when) ||
(skip_precached && ext4_test_inode_state(&ei->vfs_inode,
EXT4_STATE_EXT_PRECACHED))) {
nr_skipped++;
list_move_tail(cur, &skipped);
continue;
}
......@@ -955,11 +976,33 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
}
/* Move the newer inodes into the tail of the LRU list. */
list_splice_tail(&skiped, &sbi->s_es_lru);
list_splice_tail(&skipped, &sbi->s_es_lru);
INIT_LIST_HEAD(&skipped);
/*
* If we skipped any inodes, and we weren't able to make any
* forward progress, sort the list and try again.
*/
if ((nr_shrunk == 0) && nr_skipped && !retried) {
retried++;
list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
sbi->s_es_last_sorted = jiffies;
ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
i_es_lru);
/*
* If there are no non-precached inodes left on the
* list, start releasing precached extents.
*/
if (ext4_test_inode_state(&ei->vfs_inode,
EXT4_STATE_EXT_PRECACHED))
skip_precached = 0;
goto retry;
}
spin_unlock(&sbi->s_es_lru_lock);
if (locked_ei && nr_shrunk == 0)
nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
return nr_shrunk;
}
......@@ -1034,10 +1077,16 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
struct rb_node *node;
struct extent_status *es;
int nr_shrunk = 0;
static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
if (ei->i_es_lru_nr == 0)
return 0;
if (ext4_test_inode_state(inode, EXT4_STATE_EXT_PRECACHED) &&
__ratelimit(&_rs))
ext4_warning(inode->i_sb, "forced shrink of precached extents");
node = rb_first(&tree->root);
while (node != NULL) {
es = rb_entry(node, struct extent_status, rb_node);
......
......@@ -29,16 +29,26 @@
/*
* These flags live in the high bits of extent_status.es_pblk
*/
#define EXTENT_STATUS_WRITTEN (1ULL << 63)
#define EXTENT_STATUS_UNWRITTEN (1ULL << 62)
#define EXTENT_STATUS_DELAYED (1ULL << 61)
#define EXTENT_STATUS_HOLE (1ULL << 60)
#define ES_SHIFT 60
#define EXTENT_STATUS_WRITTEN (1 << 3)
#define EXTENT_STATUS_UNWRITTEN (1 << 2)
#define EXTENT_STATUS_DELAYED (1 << 1)
#define EXTENT_STATUS_HOLE (1 << 0)
#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
EXTENT_STATUS_UNWRITTEN | \
EXTENT_STATUS_DELAYED | \
EXTENT_STATUS_HOLE)
#define ES_WRITTEN (1ULL << 63)
#define ES_UNWRITTEN (1ULL << 62)
#define ES_DELAYED (1ULL << 61)
#define ES_HOLE (1ULL << 60)
#define ES_MASK (ES_WRITTEN | ES_UNWRITTEN | \
ES_DELAYED | ES_HOLE)
struct ext4_sb_info;
struct ext4_extent;
......@@ -60,7 +70,10 @@ extern void ext4_es_init_tree(struct ext4_es_tree *tree);
extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned long long status);
unsigned int status);
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned int status);
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len);
extern void ext4_es_find_delayed_extent_range(struct inode *inode,
......@@ -68,36 +81,35 @@ extern void ext4_es_find_delayed_extent_range(struct inode *inode,
struct extent_status *es);
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es);
extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
static inline int ext4_es_is_written(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0;
return (es->es_pblk & ES_WRITTEN) != 0;
}
static inline int ext4_es_is_unwritten(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0;
return (es->es_pblk & ES_UNWRITTEN) != 0;
}
static inline int ext4_es_is_delayed(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0;
return (es->es_pblk & ES_DELAYED) != 0;
}
static inline int ext4_es_is_hole(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_HOLE) != 0;
return (es->es_pblk & ES_HOLE) != 0;
}
static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
static inline unsigned int ext4_es_status(struct extent_status *es)
{
return (es->es_pblk & EXTENT_STATUS_FLAGS);
return es->es_pblk >> ES_SHIFT;
}
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
{
return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
return es->es_pblk & ~ES_MASK;
}
static inline void ext4_es_store_pblock(struct extent_status *es,
......@@ -105,19 +117,16 @@ static inline void ext4_es_store_pblock(struct extent_status *es,
{
ext4_fsblk_t block;
block = (pb & ~EXTENT_STATUS_FLAGS) |
(es->es_pblk & EXTENT_STATUS_FLAGS);
block = (pb & ~ES_MASK) | (es->es_pblk & ES_MASK);
es->es_pblk = block;
}
static inline void ext4_es_store_status(struct extent_status *es,
unsigned long long status)
unsigned int status)
{
ext4_fsblk_t block;
block = (status & EXTENT_STATUS_FLAGS) |
(es->es_pblk & ~EXTENT_STATUS_FLAGS);
es->es_pblk = block;
es->es_pblk = (((ext4_fsblk_t)
(status & EXTENT_STATUS_FLAGS) << ES_SHIFT) |
(es->es_pblk & ~ES_MASK));
}
extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi);
......
......@@ -70,18 +70,16 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
ext4_group_t block_group,
struct ext4_group_desc *gdp)
{
struct ext4_group_info *grp;
J_ASSERT_BH(bh, buffer_locked(bh));
/* If checksum is bad mark all blocks and inodes use to prevent
* allocation, essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
ext4_error(sb, "Checksum bad for group %u", block_group);
ext4_free_group_clusters_set(sb, gdp, 0);
ext4_free_inodes_set(sb, gdp, 0);
ext4_itable_unused_set(sb, gdp, 0);
memset(bh->b_data, 0xff, sb->s_blocksize);
ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh,
EXT4_INODES_PER_GROUP(sb) / 8);
grp = ext4_get_group_info(sb, block_group);
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return 0;
}
......@@ -117,6 +115,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
struct ext4_group_desc *desc;
struct buffer_head *bh = NULL;
ext4_fsblk_t bitmap_blk;
struct ext4_group_info *grp;
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
......@@ -185,6 +184,8 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
put_bh(bh);
ext4_error(sb, "Corrupt inode bitmap - block_group = %u, "
"inode_bitmap = %llu", block_group, bitmap_blk);
grp = ext4_get_group_info(sb, block_group);
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
return NULL;
}
ext4_unlock_group(sb, block_group);
......@@ -221,6 +222,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
struct ext4_super_block *es;
struct ext4_sb_info *sbi;
int fatal = 0, err, count, cleared;
struct ext4_group_info *grp;
if (!sb) {
printk(KERN_ERR "EXT4-fs: %s:%d: inode on "
......@@ -266,7 +268,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh)
/* Don't bother if the inode bitmap is corrupt. */
grp = ext4_get_group_info(sb, block_group);
if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh)
goto error_return;
BUFFER_TRACE(bitmap_bh, "get_write_access");
......@@ -315,8 +319,10 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal)
fatal = err;
} else
} else {
ext4_error(sb, "bit already cleared for inode %lu", ino);
set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
}
error_return:
brelse(bitmap_bh);
......@@ -624,6 +630,51 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
return -1;
}
/*
* In no journal mode, if an inode has recently been deleted, we want
* to avoid reusing it until we're reasonably sure the inode table
* block has been written back to disk. (Yes, these values are
* somewhat arbitrary...)
*/
#define RECENTCY_MIN 5
#define RECENTCY_DIRTY 30
static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
{
struct ext4_group_desc *gdp;
struct ext4_inode *raw_inode;
struct buffer_head *bh;
unsigned long dtime, now;
int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
int offset, ret = 0, recentcy = RECENTCY_MIN;
gdp = ext4_get_group_desc(sb, group, NULL);
if (unlikely(!gdp))
return 0;
bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
(ino / inodes_per_block));
if (unlikely(!bh) || !buffer_uptodate(bh))
/*
* If the block is not in the buffer cache, then it
* must have been written out.
*/
goto out;
offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
raw_inode = (struct ext4_inode *) (bh->b_data + offset);
dtime = le32_to_cpu(raw_inode->i_dtime);
now = get_seconds();
if (buffer_dirty(bh))
recentcy += RECENTCY_DIRTY;
if (dtime && (dtime < now) && (now < dtime + recentcy))
ret = 1;
out:
brelse(bh);
return ret;
}
/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
......@@ -652,6 +703,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
struct inode *ret;
ext4_group_t i;
ext4_group_t flex_group;
struct ext4_group_info *grp;
/* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink)
......@@ -725,10 +777,22 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
continue;
}
grp = ext4_get_group_info(sb, group);
/* Skip groups with already-known suspicious inode tables */
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
if (++group == ngroups)
group = 0;
continue;
}
brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
if (!inode_bitmap_bh)
goto out;
/* Skip groups with suspicious inode tables */
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) {
if (++group == ngroups)
group = 0;
continue;
}
repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
......@@ -741,6 +805,11 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
"inode=%lu", ino + 1);
continue;
}
if ((EXT4_SB(sb)->s_journal == NULL) &&
recently_deleted(sb, group, ino)) {
ino++;
goto next_inode;
}
if (!handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
......@@ -764,6 +833,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
ino++; /* the inode bitmap is zero-based */
if (!ret2)
goto got; /* we grabbed the inode! */
next_inode:
if (ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
next_group:
......
......@@ -23,7 +23,6 @@
#include <linux/aio.h>
#include "ext4_jbd2.h"
#include "truncate.h"
#include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */
#include <trace/events/ext4.h>
......
This diff is collapsed.
......@@ -17,7 +17,6 @@
#include <asm/uaccess.h>
#include "ext4_jbd2.h"
#include "ext4.h"
#include "ext4_extents.h"
#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
......@@ -624,6 +623,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return 0;
}
case EXT4_IOC_PRECACHE_EXTENTS:
return ext4_ext_precache(inode);
default:
return -ENOTTY;
......@@ -688,6 +689,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case EXT4_IOC_MOVE_EXT:
case FITRIM:
case EXT4_IOC_RESIZE_FS:
case EXT4_IOC_PRECACHE_EXTENTS:
break;
default:
return -ENOIOCTLCMD;
......
......@@ -751,13 +751,15 @@ void ext4_mb_generate_buddy(struct super_block *sb,
if (free != grp->bb_free) {
ext4_grp_locked_error(sb, group, 0, 0,
"%u clusters in bitmap, %u in gd",
"%u clusters in bitmap, %u in gd; "
"block bitmap corrupt.",
free, grp->bb_free);
/*
* If we intent to continue, we consider group descritor
* If we intend to continue, we consider group descriptor
* corrupt and update bb_free using bitmap value
*/
grp->bb_free = free;
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
}
mb_set_largest_free_order(sb, grp);
......@@ -1398,6 +1400,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
BUG_ON(last >= (sb->s_blocksize << 3));
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
/* Don't bother if the block group is corrupt. */
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
return;
mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count);
......@@ -1423,7 +1429,11 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
inode ? inode->i_ino : 0,
blocknr,
"freeing already freed block "
"(bit %u)", block);
"(bit %u); block bitmap corrupt.",
block);
/* Mark the block group as corrupt. */
set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
&e4b->bd_info->bb_state);
mb_regenerate_buddy(e4b);
goto done;
}
......@@ -1790,6 +1800,11 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
if (err)
return err;
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
ext4_mb_unload_buddy(e4b);
return 0;
}
ext4_lock_group(ac->ac_sb, group);
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
ac->ac_g_ex.fe_len, &ex);
......@@ -1987,6 +2002,9 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
if (cr <= 2 && free < ac->ac_g_ex.fe_len)
return 0;
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
return 0;
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
int ret = ext4_mb_init_group(ac->ac_sb, group);
......@@ -4585,6 +4603,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *gd_bh;
ext4_group_t block_group;
struct ext4_sb_info *sbi;
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_buddy e4b;
unsigned int count_clusters;
int err = 0;
......@@ -4673,6 +4692,10 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
ext4_get_group_info(sb, block_group))))
return;
/*
* Check to see if we are freeing blocks across a group
* boundary.
......@@ -4784,7 +4807,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
......@@ -4792,10 +4814,23 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
&sbi->s_flex_groups[flex_group].free_clusters);
}
ext4_mb_unload_buddy(&e4b);
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) {
percpu_counter_add(&sbi->s_dirtyclusters_counter,
count_clusters);
spin_lock(&ei->i_block_reservation_lock);
if (flags & EXT4_FREE_BLOCKS_METADATA)
ei->i_reserved_meta_blocks += count_clusters;
else
ei->i_reserved_data_blocks += count_clusters;
spin_unlock(&ei->i_block_reservation_lock);
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_reclaim_block(inode,
EXT4_C2B(sbi, count_clusters));
} else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
ext4_mb_unload_buddy(&e4b);
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
......
......@@ -39,7 +39,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
newext.ee_block = cpu_to_le32(lb->first_block);
newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
ext4_ext_store_pblock(&newext, lb->first_pblock);
path = ext4_ext_find_extent(inode, lb->first_block, NULL);
path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0);
if (IS_ERR(path)) {
retval = PTR_ERR(path);
......@@ -494,7 +494,7 @@ int ext4_ext_migrate(struct inode *inode)
* superblock modification.
*
* For the tmp_inode we already have committed the
* trascation that created the inode. Later as and
* transaction that created the inode. Later as and
* when we add extents we extent the journal
*/
/*
......
......@@ -37,7 +37,7 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
int ret = 0;
struct ext4_ext_path *path;
path = ext4_ext_find_extent(inode, lblock, *orig_path);
path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE);
if (IS_ERR(path))
ret = PTR_ERR(path);
else if (path[ext_depth(inode)].p_ext == NULL)
......
......@@ -3005,15 +3005,19 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
/*
* Anybody can rename anything with this: the permission checks are left to the
* higher-level routines.
*
* n.b. old_{dentry,inode) refers to the source dentry/inode
* while new_{dentry,inode) refers to the destination dentry/inode
* This comes from rename(const char *oldpath, const char *newpath)
*/
static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
handle_t *handle;
handle_t *handle = NULL;
struct inode *old_inode, *new_inode;
struct buffer_head *old_bh, *new_bh, *dir_bh;
struct ext4_dir_entry_2 *old_de, *new_de;
int retval, force_da_alloc = 0;
int retval;
int inlined = 0, new_inlined = 0;
struct ext4_dir_entry_2 *parent_de;
......@@ -3026,14 +3030,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
* in separate transaction */
if (new_dentry->d_inode)
dquot_initialize(new_dentry->d_inode);
handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
(2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
if (IS_ERR(handle))
return PTR_ERR(handle);
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
ext4_handle_sync(handle);
old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL);
/*
......@@ -3056,6 +3052,18 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
new_bh = NULL;
}
}
if (new_inode && !test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
ext4_alloc_da_blocks(old_inode);
handle = ext4_journal_start(old_dir, EXT4_HT_DIR,
(2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
if (IS_ERR(handle))
return PTR_ERR(handle);
if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
ext4_handle_sync(handle);
if (S_ISDIR(old_inode->i_mode)) {
if (new_inode) {
retval = -ENOTEMPTY;
......@@ -3186,8 +3194,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
ext4_mark_inode_dirty(handle, new_inode);
if (!new_inode->i_nlink)
ext4_orphan_add(handle, new_inode);
if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
force_da_alloc = 1;
}
retval = 0;
......@@ -3195,9 +3201,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
brelse(dir_bh);
brelse(old_bh);
brelse(new_bh);
ext4_journal_stop(handle);
if (retval == 0 && force_da_alloc)
ext4_alloc_da_blocks(old_inode);
if (handle)
ext4_journal_stop(handle);
return retval;
}
......
......@@ -1134,8 +1134,8 @@ enum {
Opt_nouid32, Opt_debug, Opt_removed,
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit,
Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
......@@ -1179,6 +1179,7 @@ static const match_table_t tokens = {
{Opt_min_batch_time, "min_batch_time=%u"},
{Opt_max_batch_time, "max_batch_time=%u"},
{Opt_journal_dev, "journal_dev=%u"},
{Opt_journal_path, "journal_path=%s"},
{Opt_journal_checksum, "journal_checksum"},
{Opt_journal_async_commit, "journal_async_commit"},
{Opt_abort, "abort"},
......@@ -1338,6 +1339,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
#define MOPT_NO_EXT2 0x0100
#define MOPT_NO_EXT3 0x0200
#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
#define MOPT_STRING 0x0400
static const struct mount_opts {
int token;
......@@ -1387,6 +1389,7 @@ static const struct mount_opts {
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
{Opt_journal_dev, 0, MOPT_GTE0},
{Opt_journal_path, 0, MOPT_STRING},
{Opt_journal_ioprio, 0, MOPT_GTE0},
{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
......@@ -1480,7 +1483,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
return -1;
}
if (args->from && match_int(args, &arg))
if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg))
return -1;
if (args->from && (m->flags & MOPT_GTE0) && (arg < 0))
return -1;
......@@ -1544,6 +1547,44 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
return -1;
}
*journal_devnum = arg;
} else if (token == Opt_journal_path) {
char *journal_path;
struct inode *journal_inode;
struct path path;
int error;
if (is_remount) {
ext4_msg(sb, KERN_ERR,
"Cannot specify journal on remount");
return -1;
}
journal_path = match_strdup(&args[0]);
if (!journal_path) {
ext4_msg(sb, KERN_ERR, "error: could not dup "
"journal device string");
return -1;
}
error = kern_path(journal_path, LOOKUP_FOLLOW, &path);
if (error) {
ext4_msg(sb, KERN_ERR, "error: could not find "
"journal device path: error %d", error);
kfree(journal_path);
return -1;
}
journal_inode = path.dentry->d_inode;
if (!S_ISBLK(journal_inode->i_mode)) {
ext4_msg(sb, KERN_ERR, "error: journal path %s "
"is not a block device", journal_path);
path_put(&path);
kfree(journal_path);
return -1;
}
*journal_devnum = new_encode_dev(journal_inode->i_rdev);
path_put(&path);
kfree(journal_path);
} else if (token == Opt_journal_ioprio) {
if (arg > 7) {
ext4_msg(sb, KERN_ERR, "Invalid journal IO priority"
......
......@@ -343,14 +343,14 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
struct page *page = bh->b_page;
__u8 *addr;
__u32 csum32;
__be32 seq;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return;
sequence = cpu_to_be32(sequence);
seq = cpu_to_be32(sequence);
addr = kmap_atomic(page);
csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
sizeof(sequence));
csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data),
bh->b_size);
kunmap_atomic(addr);
......
......@@ -130,9 +130,10 @@ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
}
static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
{
__u32 csum, old_csum;
__u32 csum;
__be32 old_csum;
old_csum = sb->s_checksum;
sb->s_checksum = 0;
......
......@@ -178,7 +178,8 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
void *buf)
{
struct jbd2_journal_block_tail *tail;
__u32 provided, calculated;
__be32 provided;
__u32 calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
......@@ -190,8 +191,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
tail->t_checksum = provided;
provided = be32_to_cpu(provided);
return provided == calculated;
return provided == cpu_to_be32(calculated);
}
/*
......@@ -381,7 +381,8 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
{
struct commit_header *h;
__u32 provided, calculated;
__be32 provided;
__u32 calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
......@@ -392,21 +393,20 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
h->h_chksum[0] = provided;
provided = be32_to_cpu(provided);
return provided == calculated;
return provided == cpu_to_be32(calculated);
}
static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
void *buf, __u32 sequence)
{
__u32 csum32;
__be32 seq;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
sequence = cpu_to_be32(sequence);
csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence,
sizeof(sequence));
seq = cpu_to_be32(sequence);
csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
return tag->t_checksum == cpu_to_be16(csum32);
......@@ -808,7 +808,8 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
void *buf)
{
struct jbd2_journal_revoke_tail *tail;
__u32 provided, calculated;
__be32 provided;
__u32 calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
return 1;
......@@ -820,8 +821,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
tail->r_checksum = provided;
provided = be32_to_cpu(provided);
return provided == calculated;
return provided == cpu_to_be32(calculated);
}
/* Scan a revoke record, marking all blocks mentioned as revoked. */
......
......@@ -1094,6 +1094,14 @@ static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
dquot->dq_dqb.dqb_rsvspace -= number;
}
static void dquot_reclaim_reserved_space(struct dquot *dquot, qsize_t number)
{
if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
number = dquot->dq_dqb.dqb_curspace;
dquot->dq_dqb.dqb_rsvspace += number;
dquot->dq_dqb.dqb_curspace -= number;
}
static inline
void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
{
......@@ -1528,6 +1536,15 @@ void inode_claim_rsv_space(struct inode *inode, qsize_t number)
}
EXPORT_SYMBOL(inode_claim_rsv_space);
void inode_reclaim_rsv_space(struct inode *inode, qsize_t number)
{
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) += number;
__inode_sub_bytes(inode, number);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(inode_reclaim_rsv_space);
void inode_sub_rsv_space(struct inode *inode, qsize_t number)
{
spin_lock(&inode->i_lock);
......@@ -1701,6 +1718,35 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
}
EXPORT_SYMBOL(dquot_claim_space_nodirty);
/*
* Convert allocated space back to in-memory reserved quotas
*/
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
{
int cnt;
if (!dquot_active(inode)) {
inode_reclaim_rsv_space(inode, number);
return;
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
spin_lock(&dq_data_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (inode->i_dquot[cnt])
dquot_reclaim_reserved_space(inode->i_dquot[cnt],
number);
}
/* Update inode bytes */
inode_reclaim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(inode->i_dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return;
}
EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
/*
* This operation can block, but only after everything is updated
*/
......
......@@ -447,9 +447,8 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
EXPORT_SYMBOL(inode_add_bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes)
void __inode_sub_bytes(struct inode *inode, loff_t bytes)
{
spin_lock(&inode->i_lock);
inode->i_blocks -= bytes >> 9;
bytes &= 511;
if (inode->i_bytes < bytes) {
......@@ -457,6 +456,14 @@ void inode_sub_bytes(struct inode *inode, loff_t bytes)
inode->i_bytes += 512;
}
inode->i_bytes -= bytes;
}
EXPORT_SYMBOL(__inode_sub_bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes)
{
spin_lock(&inode->i_lock);
__inode_sub_bytes(inode, bytes);
spin_unlock(&inode->i_lock);
}
......
......@@ -2503,6 +2503,7 @@ extern void generic_fillattr(struct inode *, struct kstat *);
extern int vfs_getattr(struct path *, struct kstat *);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
void inode_add_bytes(struct inode *inode, loff_t bytes);
void __inode_sub_bytes(struct inode *inode, loff_t bytes);
void inode_sub_bytes(struct inode *inode, loff_t bytes);
loff_t inode_get_bytes(struct inode *inode);
void inode_set_bytes(struct inode *inode, loff_t bytes);
......
......@@ -41,6 +41,7 @@ void __quota_error(struct super_block *sb, const char *func,
void inode_add_rsv_space(struct inode *inode, qsize_t number);
void inode_claim_rsv_space(struct inode *inode, qsize_t number);
void inode_sub_rsv_space(struct inode *inode, qsize_t number);
void inode_reclaim_rsv_space(struct inode *inode, qsize_t number);
void dquot_initialize(struct inode *inode);
void dquot_drop(struct inode *inode);
......@@ -59,6 +60,7 @@ int dquot_alloc_inode(const struct inode *inode);
int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
void dquot_free_inode(const struct inode *inode);
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number);
int dquot_disable(struct super_block *sb, int type, unsigned int flags);
/* Suspend quotas on remount RO */
......@@ -238,6 +240,13 @@ static inline int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
return 0;
}
static inline int dquot_reclaim_space_nodirty(struct inode *inode,
qsize_t number)
{
inode_sub_bytes(inode, number);
return 0;
}
static inline int dquot_disable(struct super_block *sb, int type,
unsigned int flags)
{
......@@ -336,6 +345,12 @@ static inline int dquot_claim_block(struct inode *inode, qsize_t nr)
return ret;
}
static inline void dquot_reclaim_block(struct inode *inode, qsize_t nr)
{
dquot_reclaim_space_nodirty(inode, nr << inode->i_blkbits);
mark_inode_dirty_sync(inode);
}
static inline void dquot_free_space_nodirty(struct inode *inode, qsize_t nr)
{
__dquot_free_space(inode, nr, 0);
......
......@@ -14,7 +14,6 @@ struct ext4_prealloc_space;
struct ext4_inode_info;
struct mpage_da_data;
struct ext4_map_blocks;
struct ext4_extent;
struct extent_status;
#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
......@@ -64,10 +63,10 @@ struct extent_status;
{ EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER, "LAST_CLUSTER" })
#define show_extent_status(status) __print_flags(status, "", \
{ (1 << 3), "W" }, \
{ (1 << 2), "U" }, \
{ (1 << 1), "D" }, \
{ (1 << 0), "H" })
{ EXTENT_STATUS_WRITTEN, "W" }, \
{ EXTENT_STATUS_UNWRITTEN, "U" }, \
{ EXTENT_STATUS_DELAYED, "D" }, \
{ EXTENT_STATUS_HOLE, "H" })
TRACE_EVENT(ext4_free_inode,
......@@ -2192,7 +2191,7 @@ TRACE_EVENT(ext4_ext_remove_space_done,
(unsigned short) __entry->eh_entries)
);
TRACE_EVENT(ext4_es_insert_extent,
DECLARE_EVENT_CLASS(ext4__es_extent,
TP_PROTO(struct inode *inode, struct extent_status *es),
TP_ARGS(inode, es),
......@@ -2212,7 +2211,7 @@ TRACE_EVENT(ext4_es_insert_extent,
__entry->lblk = es->es_lblk;
__entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es);
__entry->status = ext4_es_status(es) >> 60;
__entry->status = ext4_es_status(es);
),
TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s",
......@@ -2222,6 +2221,18 @@ TRACE_EVENT(ext4_es_insert_extent,
__entry->pblk, show_extent_status(__entry->status))
);
DEFINE_EVENT(ext4__es_extent, ext4_es_insert_extent,
TP_PROTO(struct inode *inode, struct extent_status *es),
TP_ARGS(inode, es)
);
DEFINE_EVENT(ext4__es_extent, ext4_es_cache_extent,
TP_PROTO(struct inode *inode, struct extent_status *es),
TP_ARGS(inode, es)
);
TRACE_EVENT(ext4_es_remove_extent,
TP_PROTO(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len),
......@@ -2289,7 +2300,7 @@ TRACE_EVENT(ext4_es_find_delayed_extent_range_exit,
__entry->lblk = es->es_lblk;
__entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es);
__entry->status = ext4_es_status(es) >> 60;
__entry->status = ext4_es_status(es);
),
TP_printk("dev %d,%d ino %lu es [%u/%u) mapped %llu status %s",
......@@ -2343,7 +2354,7 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
__entry->lblk = es->es_lblk;
__entry->len = es->es_len;
__entry->pblk = ext4_es_pblock(es);
__entry->status = ext4_es_status(es) >> 60;
__entry->status = ext4_es_status(es);
__entry->found = found;
),
......
......@@ -40,6 +40,7 @@ struct fiemap {
#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */
#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */
#define FIEMAP_FLAG_CACHE 0x00000004 /* request caching of the extents */
#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment