Commit 1712a699 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: Add new "development flag" to the ext4 filesystem
  ext4: Don't panic in case of corrupt bitmap
  ext4: allocate struct ext4_allocation_context from a kmem cache
  JBD2:  Clear buffer_ordered flag for barried IO request on success
  ext4: Fix Direct I/O locking
  ext4: Fix circular locking dependency with migrate and rm.
  allow in-inode EAs on ext4 root inode
  ext4: Fix null bh pointer dereference in mballoc
  ext4: Don't set EXTENTS_FL flag for fast symlinks
  JBD2: Use the incompat macro for testing the incompat feature.
  jbd2: Fix reference counting on the journal commit block's buffer head
  [PATCH] jbd: Remove useless loop when writing commit record
  jbd2: Add error check to journal_wait_on_commit_record to avoid oops
parents 95a940e9 469108ff
......@@ -892,7 +892,16 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
return err;
}
#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
/*
* Number of credits we need for writing DIO_MAX_BLOCKS:
* We need sb + group descriptor + bitmap + inode -> 4
* For B blocks with A block pointers per block we need:
* 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect).
* If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25.
*/
#define DIO_CREDITS 25
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
......@@ -939,49 +948,31 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
handle_t *handle = ext4_journal_current_handle();
int ret = 0;
int ret = 0, started = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
if (!create)
goto get_block; /* A read */
if (max_blocks == 1)
goto get_block; /* A single block get */
if (handle->h_transaction->t_state == T_LOCKED) {
/*
* Huge direct-io writes can hold off commits for long
* periods of time. Let this commit run.
*/
ext4_journal_stop(handle);
handle = ext4_journal_start(inode, DIO_CREDITS);
if (IS_ERR(handle))
if (create && !handle) {
/* Direct IO write... */
if (max_blocks > DIO_MAX_BLOCKS)
max_blocks = DIO_MAX_BLOCKS;
handle = ext4_journal_start(inode, DIO_CREDITS +
2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto get_block;
}
if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) {
/*
* Getting low on buffer credits...
*/
ret = ext4_journal_extend(handle, DIO_CREDITS);
if (ret > 0) {
/*
* Couldn't extend the transaction. Start a new one.
*/
ret = ext4_journal_restart(handle, DIO_CREDITS);
goto out;
}
started = 1;
}
get_block:
if (ret == 0) {
ret = ext4_get_blocks_wrap(handle, inode, iblock,
ret = ext4_get_blocks_wrap(handle, inode, iblock,
max_blocks, bh_result, create, 0);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
ret = 0;
}
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
ret = 0;
}
if (started)
ext4_journal_stop(handle);
out:
return ret;
}
......@@ -1671,7 +1662,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
* if the machine crashes during the write.
*
* If the O_DIRECT write is intantiating holes inside i_size and the machine
* crashes then stale disk data _may_ be exposed inside the file.
* crashes then stale disk data _may_ be exposed inside the file. But current
* VFS code falls back into buffered path in that case so we are safe.
*/
static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
......@@ -1680,7 +1672,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct ext4_inode_info *ei = EXT4_I(inode);
handle_t *handle = NULL;
handle_t *handle;
ssize_t ret;
int orphan = 0;
size_t count = iov_length(iov, nr_segs);
......@@ -1688,17 +1680,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
if (rw == WRITE) {
loff_t final_size = offset + count;
handle = ext4_journal_start(inode, DIO_CREDITS);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
if (final_size > inode->i_size) {
/* Credits for sb + inode write */
handle = ext4_journal_start(inode, 2);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
}
ret = ext4_orphan_add(handle, inode);
if (ret)
goto out_stop;
if (ret) {
ext4_journal_stop(handle);
goto out;
}
orphan = 1;
ei->i_disksize = inode->i_size;
ext4_journal_stop(handle);
}
}
......@@ -1706,18 +1702,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
offset, nr_segs,
ext4_get_block, NULL);
/*
* Reacquire the handle: ext4_get_block() can restart the transaction
*/
handle = ext4_journal_current_handle();
out_stop:
if (handle) {
if (orphan) {
int err;
if (orphan && inode->i_nlink)
/* Credits for sb + inode write */
handle = ext4_journal_start(inode, 2);
if (IS_ERR(handle)) {
/* This is really bad luck. We've written the data
* but cannot extend i_size. Bail out and pretend
* the write failed... */
ret = PTR_ERR(handle);
goto out;
}
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
if (orphan && ret > 0) {
if (ret > 0) {
loff_t end = offset + ret;
if (end > inode->i_size) {
ei->i_disksize = end;
......@@ -2758,13 +2757,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);
if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 &&
EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
/*
* When mke2fs creates big inodes it does not zero out
* the unused bytes above EXT4_GOOD_OLD_INODE_SIZE,
* so ignore those first few inodes.
*/
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb)) {
......
This diff is collapsed.
......@@ -61,10 +61,9 @@ static int finish_range(handle_t *handle, struct inode *inode,
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
}
if (needed) {
} else if (needed) {
retval = ext4_journal_extend(handle, needed);
if (retval != 0) {
if (retval) {
/*
* IF not able to extend the journal restart the journal
*/
......@@ -220,6 +219,26 @@ static int update_tind_extent_range(handle_t *handle, struct inode *inode,
}
static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
{
int retval = 0, needed;
if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
return 0;
/*
* We are freeing a blocks. During this we touch
* superblock, group descriptor and block bitmap.
* So allocate a credit of 3. We may update
* quota (user and group).
*/
needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
if (ext4_journal_extend(handle, needed) != 0)
retval = ext4_journal_restart(handle, needed);
return retval;
}
static int free_dind_blocks(handle_t *handle,
struct inode *inode, __le32 i_data)
{
......@@ -234,11 +253,14 @@ static int free_dind_blocks(handle_t *handle,
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i])
if (tmp_idata[i]) {
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode,
le32_to_cpu(tmp_idata[i]), 1, 1);
}
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
return 0;
}
......@@ -267,29 +289,32 @@ static int free_tind_blocks(handle_t *handle,
}
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
return 0;
}
static int free_ind_block(handle_t *handle, struct inode *inode)
static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
{
int retval;
struct ext4_inode_info *ei = EXT4_I(inode);
if (ei->i_data[EXT4_IND_BLOCK])
/* ei->i_data[EXT4_IND_BLOCK] */
if (i_data[0]) {
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode,
le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1);
le32_to_cpu(i_data[0]), 1, 1);
}
if (ei->i_data[EXT4_DIND_BLOCK]) {
retval = free_dind_blocks(handle, inode,
ei->i_data[EXT4_DIND_BLOCK]);
/* ei->i_data[EXT4_DIND_BLOCK] */
if (i_data[1]) {
retval = free_dind_blocks(handle, inode, i_data[1]);
if (retval)
return retval;
}
if (ei->i_data[EXT4_TIND_BLOCK]) {
retval = free_tind_blocks(handle, inode,
ei->i_data[EXT4_TIND_BLOCK]);
/* ei->i_data[EXT4_TIND_BLOCK] */
if (i_data[2]) {
retval = free_tind_blocks(handle, inode, i_data[2]);
if (retval)
return retval;
}
......@@ -297,15 +322,13 @@ static int free_ind_block(handle_t *handle, struct inode *inode)
}
static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
struct inode *tmp_inode, int retval)
struct inode *tmp_inode)
{
int retval;
__le32 i_data[3];
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
retval = free_ind_block(handle, inode);
if (retval)
goto err_out;
/*
* One credit accounted for writing the
* i_data field of the original inode
......@@ -317,6 +340,11 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
goto err_out;
}
i_data[0] = ei->i_data[EXT4_IND_BLOCK];
i_data[1] = ei->i_data[EXT4_DIND_BLOCK];
i_data[2] = ei->i_data[EXT4_TIND_BLOCK];
down_write(&EXT4_I(inode)->i_data_sem);
/*
* We have the extent map build with the tmp inode.
* Now copy the i_data across
......@@ -336,8 +364,15 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
spin_lock(&inode->i_lock);
inode->i_blocks += tmp_inode->i_blocks;
spin_unlock(&inode->i_lock);
up_write(&EXT4_I(inode)->i_data_sem);
/*
* We mark the inode dirty after, because we decrement the
* i_blocks when freeing the indirect meta-data blocks
*/
retval = free_ind_block(handle, inode, i_data);
ext4_mark_inode_dirty(handle, inode);
err_out:
return retval;
}
......@@ -365,6 +400,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
}
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, block, 1, 1);
return retval;
}
......@@ -414,7 +450,12 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
return -EINVAL;
down_write(&EXT4_I(inode)->i_data_sem);
if (S_ISLNK(inode->i_mode) && inode->i_blocks == 0)
/*
* don't migrate fast symlink
*/
return retval;
handle = ext4_journal_start(inode,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
......@@ -448,13 +489,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
ext4_orphan_add(handle, tmp_inode);
ext4_journal_stop(handle);
ei = EXT4_I(inode);
i_data = ei->i_data;
memset(&lb, 0, sizeof(lb));
/* 32 bit block address 4 bytes */
max_entries = inode->i_sb->s_blocksize >> 2;
/*
* start with one credit accounted for
* superblock modification.
......@@ -463,7 +497,20 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
* trascation that created the inode. Later as and
* when we add extents we extent the journal
*/
/*
* inode_mutex prevent write and truncate on the file. Read still goes
* through. We take i_data_sem in ext4_ext_swap_inode_data before we
* switch the inode format to prevent read.
*/
mutex_lock(&(inode->i_mutex));
handle = ext4_journal_start(inode, 1);
ei = EXT4_I(inode);
i_data = ei->i_data;
memset(&lb, 0, sizeof(lb));
/* 32 bit block address 4 bytes */
max_entries = inode->i_sb->s_blocksize >> 2;
for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
if (i_data[i]) {
retval = update_extent_range(handle, tmp_inode,
......@@ -501,19 +548,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
*/
retval = finish_range(handle, tmp_inode, &lb);
err_out:
/*
* We are either freeing extent information or indirect
* blocks. During this we touch superblock, group descriptor
* and block bitmap. Later we mark the tmp_inode dirty
* via ext4_ext_tree_init. So allocate a credit of 4
* We may update quota (user and group).
*
* FIXME!! we may be touching bitmaps in different block groups.
*/
if (ext4_journal_extend(handle,
4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0)
ext4_journal_restart(handle,
4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
if (retval)
/*
* Failure case delete the extent information with the
......@@ -522,7 +556,11 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
free_ext_block(handle, tmp_inode);
else
retval = ext4_ext_swap_inode_data(handle, inode,
tmp_inode, retval);
tmp_inode);
/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
if (ext4_journal_extend(handle, 1) != 0)
ext4_journal_restart(handle, 1);
/*
* Mark the tmp_inode as of size zero
......@@ -550,8 +588,7 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
tmp_inode->i_nlink = 0;
ext4_journal_stop(handle);
up_write(&EXT4_I(inode)->i_data_sem);
mutex_unlock(&(inode->i_mutex));
if (tmp_inode)
iput(tmp_inode);
......
......@@ -2223,6 +2223,7 @@ static int ext4_symlink (struct inode * dir,
inode->i_op = &ext4_fast_symlink_inode_operations;
memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
inode->i_size = l-1;
EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
}
EXT4_I(inode)->i_disksize = inode->i_size;
err = ext4_add_nondir(handle, dentry, inode);
......
......@@ -1919,6 +1919,17 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
printk(KERN_WARNING
"EXT4-fs warning: feature flags set on rev 0 fs, "
"running e2fsck is recommended\n");
/*
* Since ext4 is still considered development code, we require
* that the TEST_FILESYS flag in s->flags be set.
*/
if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) {
printk(KERN_WARNING "EXT4-fs: %s: not marked "
"OK to use with test code.\n", sb->s_id);
goto failed_mount;
}
/*
* Check feature flags regardless of the revision level, since we
* previously didn't change the revision level when setting the flags,
......
......@@ -104,7 +104,8 @@ static int journal_write_commit_record(journal_t *journal,
{
struct journal_head *descriptor;
struct buffer_head *bh;
int i, ret;
journal_header_t *header;
int ret;
int barrier_done = 0;
if (is_journal_aborted(journal))
......@@ -116,13 +117,10 @@ static int journal_write_commit_record(journal_t *journal,
bh = jh2bh(descriptor);
/* AKPM: buglet - add `i' to tmp! */
for (i = 0; i < bh->b_size; i += 512) {
journal_header_t *tmp = (journal_header_t*)bh->b_data;
tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
}
header = (journal_header_t *)(bh->b_data);
header->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
header->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
JBUFFER_TRACE(descriptor, "write commit block");
set_buffer_dirty(bh);
......
......@@ -136,18 +136,20 @@ static int journal_submit_commit_record(journal_t *journal,
JBUFFER_TRACE(descriptor, "submit commit block");
lock_buffer(bh);
get_bh(bh);
set_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
if (journal->j_flags & JBD2_BARRIER &&
!JBD2_HAS_COMPAT_FEATURE(journal,
!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
set_buffer_ordered(bh);
barrier_done = 1;
}
ret = submit_bh(WRITE, bh);
if (barrier_done)
clear_buffer_ordered(bh);
/* is it possible for another commit to fail at roughly
* the same time as this one? If so, we don't want to
......@@ -166,7 +168,6 @@ static int journal_submit_commit_record(journal_t *journal,
spin_unlock(&journal->j_state_lock);
/* And try again, without the barrier */
clear_buffer_ordered(bh);
set_buffer_uptodate(bh);
set_buffer_dirty(bh);
ret = submit_bh(WRITE, bh);
......@@ -872,7 +873,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (err)
__jbd2_journal_abort_hard(journal);
}
err = journal_wait_on_commit_record(cbh);
if (!err && !is_journal_aborted(journal))
err = journal_wait_on_commit_record(cbh);
if (err)
jbd2_journal_abort(journal, err);
......
......@@ -641,7 +641,7 @@ static int do_one_pass(journal_t *journal,
if (chksum_err) {
info->end_transaction = next_commit_ID;
if (!JBD2_HAS_COMPAT_FEATURE(journal,
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
printk(KERN_ERR
"JBD: Transaction %u "
......
......@@ -489,6 +489,13 @@ do { \
#define EXT4_ERROR_FS 0x0002 /* Errors detected */
#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
/*
* Misc. filesystem flags
*/
#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
/*
* Mount flags
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment