Commit d4f03186 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 bugfixes from Ted Ts'o:
 "Ext4 bug fixes for 3.17, to provide better handling of memory
  allocation failures, and to fix some journaling bugs involving
  journal checksums and FALLOC_FL_ZERO_RANGE"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix same-dir rename when inline data directory overflows
  jbd2: fix descriptor block size handling errors with journal_csum
  jbd2: fix infinite loop when recovering corrupt journal blocks
  ext4: update i_disksize coherently with block allocation on error path
  ext4: fix transaction issues for ext4_fallocate and ext_zero_range
  ext4: fix incorect journal credits reservation in ext4_zero_range
  ext4: move i_size,i_disksize update routines to helper function
  ext4: fix BUG_ON in mb_free_blocks()
  ext4: propagate errors up to ext4_find_entry()'s callers
parents ef13c8af d80d448c
...@@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) ...@@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
/* /*
* Special error return code only used by dx_probe() and its callers. * Special error return code only used by dx_probe() and its callers.
*/ */
#define ERR_BAD_DX_DIR -75000 #define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
/* /*
* Timeout and state flag for lazy initialization inode thread. * Timeout and state flag for lazy initialization inode thread.
...@@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) ...@@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
} }
/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
{
int changed = 0;
if (newsize > inode->i_size) {
i_size_write(inode, newsize);
changed = 1;
}
if (newsize > EXT4_I(inode)->i_disksize) {
ext4_update_i_disksize(inode, newsize);
changed |= 2;
}
return changed;
}
struct ext4_group_info { struct ext4_group_info {
unsigned long bb_state; unsigned long bb_state;
struct rb_root bb_free_root; struct rb_root bb_free_root;
......
...@@ -4665,7 +4665,8 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) ...@@ -4665,7 +4665,8 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode)
} }
static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
ext4_lblk_t len, int flags, int mode) ext4_lblk_t len, loff_t new_size,
int flags, int mode)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
handle_t *handle; handle_t *handle;
...@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ...@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
int retries = 0; int retries = 0;
struct ext4_map_blocks map; struct ext4_map_blocks map;
unsigned int credits; unsigned int credits;
loff_t epos;
map.m_lblk = offset; map.m_lblk = offset;
map.m_len = len;
/* /*
* Don't normalize the request if it can fit in one extent so * Don't normalize the request if it can fit in one extent so
* that it doesn't get unnecessarily split into multiple * that it doesn't get unnecessarily split into multiple
...@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ...@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
credits = ext4_chunk_trans_blocks(inode, len); credits = ext4_chunk_trans_blocks(inode, len);
retry: retry:
while (ret >= 0 && ret < len) { while (ret >= 0 && len) {
map.m_lblk = map.m_lblk + ret;
map.m_len = len = len - ret;
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
credits); credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
...@@ -4709,6 +4710,21 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ...@@ -4709,6 +4710,21 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
ret2 = ext4_journal_stop(handle); ret2 = ext4_journal_stop(handle);
break; break;
} }
map.m_lblk += ret;
map.m_len = len = len - ret;
epos = (loff_t)map.m_lblk << inode->i_blkbits;
inode->i_ctime = ext4_current_time(inode);
if (new_size) {
if (epos > new_size)
epos = new_size;
if (ext4_update_inode_size(inode, epos) & 0x1)
inode->i_mtime = inode->i_ctime;
} else {
if (epos > inode->i_size)
ext4_set_inode_flag(inode,
EXT4_INODE_EOFBLOCKS);
}
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle); ret2 = ext4_journal_stop(handle);
if (ret2) if (ret2)
break; break;
...@@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
loff_t new_size = 0; loff_t new_size = 0;
int ret = 0; int ret = 0;
int flags; int flags;
int partial; int credits;
int partial_begin, partial_end;
loff_t start, end; loff_t start, end;
ext4_lblk_t lblk; ext4_lblk_t lblk;
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
...@@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (start < offset || end > offset + len) if (start < offset || end > offset + len)
return -EINVAL; return -EINVAL;
partial = (offset + len) & ((1 << blkbits) - 1); partial_begin = offset & ((1 << blkbits) - 1);
partial_end = (offset + len) & ((1 << blkbits) - 1);
lblk = start >> blkbits; lblk = start >> blkbits;
max_blocks = (end >> blkbits); max_blocks = (end >> blkbits);
...@@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
* If we have a partial block after EOF we have to allocate * If we have a partial block after EOF we have to allocate
* the entire block. * the entire block.
*/ */
if (partial) if (partial_end)
max_blocks += 1; max_blocks += 1;
} }
...@@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Now release the pages and zero block aligned part of pages*/ /* Now release the pages and zero block aligned part of pages*/
truncate_pagecache_range(inode, start, end - 1); truncate_pagecache_range(inode, start, end - 1);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
/* Wait all existing dio workers, newcomers will block on i_mutex */ /* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode); ext4_inode_block_unlocked_dio(inode);
...@@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (ret) if (ret)
goto out_dio; goto out_dio;
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
mode); flags, mode);
if (ret) if (ret)
goto out_dio; goto out_dio;
} }
if (!partial_begin && !partial_end)
goto out_dio;
handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); /*
* In worst case we have to writeout two nonadjacent unwritten
* blocks and update the inode
*/
credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
if (ext4_should_journal_data(inode))
credits += 2;
handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
ext4_std_error(inode->i_sb, ret); ext4_std_error(inode->i_sb, ret);
...@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
} }
inode->i_mtime = inode->i_ctime = ext4_current_time(inode); inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
if (new_size) { if (new_size) {
if (new_size > i_size_read(inode)) ext4_update_inode_size(inode, new_size);
i_size_write(inode, new_size);
if (new_size > EXT4_I(inode)->i_disksize)
ext4_update_i_disksize(inode, new_size);
} else { } else {
/* /*
* Mark that we allocate beyond EOF so the subsequent truncate * Mark that we allocate beyond EOF so the subsequent truncate
...@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if ((offset + len) > i_size_read(inode)) if ((offset + len) > i_size_read(inode))
ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
} }
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
/* Zero out partial block at the edges of the range */ /* Zero out partial block at the edges of the range */
...@@ -4880,13 +4903,11 @@ static long ext4_zero_range(struct file *file, loff_t offset, ...@@ -4880,13 +4903,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
handle_t *handle;
loff_t new_size = 0; loff_t new_size = 0;
unsigned int max_blocks; unsigned int max_blocks;
int ret = 0; int ret = 0;
int flags; int flags;
ext4_lblk_t lblk; ext4_lblk_t lblk;
struct timespec tv;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
/* Return error if mode is not supported */ /* Return error if mode is not supported */
...@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out; goto out;
} }
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
if (ret) if (ret)
goto out; goto out;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
if (IS_ERR(handle)) ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
goto out; EXT4_I(inode)->i_sync_tid);
tv = inode->i_ctime = ext4_current_time(inode);
if (new_size) {
if (new_size > i_size_read(inode)) {
i_size_write(inode, new_size);
inode->i_mtime = tv;
}
if (new_size > EXT4_I(inode)->i_disksize)
ext4_update_i_disksize(inode, new_size);
} else {
/*
* Mark that we allocate beyond EOF so the subsequent truncate
* can proceed even if the new size is the same as i_size.
*/
if ((offset + len) > i_size_read(inode))
ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
} }
ext4_mark_inode_dirty(handle, inode);
if (file->f_flags & O_SYNC)
ext4_handle_sync(handle);
ext4_journal_stop(handle);
out: out:
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
......
...@@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file, ...@@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file,
} else } else
copied = block_write_end(file, mapping, pos, copied = block_write_end(file, mapping, pos,
len, copied, page, fsdata); len, copied, page, fsdata);
/* /*
* No need to use i_size_read() here, the i_size * it's important to update i_size while still holding page lock:
* cannot change under us because we hole i_mutex.
*
* But it's important to update i_size while still holding page lock:
* page writeout could otherwise come in and zero beyond i_size. * page writeout could otherwise come in and zero beyond i_size.
*/ */
if (pos + copied > inode->i_size) { i_size_changed = ext4_update_inode_size(inode, pos + copied);
i_size_write(inode, pos + copied);
i_size_changed = 1;
}
if (pos + copied > EXT4_I(inode)->i_disksize) {
/* We need to mark inode dirty even if
* new_i_size is less that inode->i_size
* but greater than i_disksize. (hint delalloc)
*/
ext4_update_i_disksize(inode, (pos + copied));
i_size_changed = 1;
}
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
...@@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file, ...@@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file,
int ret = 0, ret2; int ret = 0, ret2;
int partial = 0; int partial = 0;
unsigned from, to; unsigned from, to;
loff_t new_i_size; int size_changed = 0;
trace_ext4_journalled_write_end(inode, pos, len, copied); trace_ext4_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1); from = pos & (PAGE_CACHE_SIZE - 1);
...@@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file, ...@@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file,
if (!partial) if (!partial)
SetPageUptodate(page); SetPageUptodate(page);
} }
new_i_size = pos + copied; size_changed = ext4_update_inode_size(inode, pos + copied);
if (new_i_size > inode->i_size)
i_size_write(inode, pos+copied);
ext4_set_inode_state(inode, EXT4_STATE_JDATA); ext4_set_inode_state(inode, EXT4_STATE_JDATA);
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
if (new_i_size > EXT4_I(inode)->i_disksize) { unlock_page(page);
ext4_update_i_disksize(inode, new_i_size); page_cache_release(page);
if (size_changed) {
ret2 = ext4_mark_inode_dirty(handle, inode); ret2 = ext4_mark_inode_dirty(handle, inode);
if (!ret) if (!ret)
ret = ret2; ret = ret2;
} }
unlock_page(page);
page_cache_release(page);
if (pos + len > inode->i_size && ext4_can_truncate(inode)) if (pos + len > inode->i_size && ext4_can_truncate(inode))
/* if we have allocated more blocks and copied /* if we have allocated more blocks and copied
* less. We will have blocks allocated outside * less. We will have blocks allocated outside
...@@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle, ...@@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
struct ext4_map_blocks *map = &mpd->map; struct ext4_map_blocks *map = &mpd->map;
int err; int err;
loff_t disksize; loff_t disksize;
int progress = 0;
mpd->io_submit.io_end->offset = mpd->io_submit.io_end->offset =
((loff_t)map->m_lblk) << inode->i_blkbits; ((loff_t)map->m_lblk) << inode->i_blkbits;
...@@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle, ...@@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle,
* is non-zero, a commit should free up blocks. * is non-zero, a commit should free up blocks.
*/ */
if ((err == -ENOMEM) || if ((err == -ENOMEM) ||
(err == -ENOSPC && ext4_count_free_clusters(sb))) (err == -ENOSPC && ext4_count_free_clusters(sb))) {
if (progress)
goto update_disksize;
return err; return err;
}
ext4_msg(sb, KERN_CRIT, ext4_msg(sb, KERN_CRIT,
"Delayed block allocation failed for " "Delayed block allocation failed for "
"inode %lu at logical offset %llu with" "inode %lu at logical offset %llu with"
...@@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle, ...@@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle,
*give_up_on_write = true; *give_up_on_write = true;
return err; return err;
} }
progress = 1;
/* /*
* Update buffer state, submit mapped pages, and get us new * Update buffer state, submit mapped pages, and get us new
* extent to map * extent to map
*/ */
err = mpage_map_and_submit_buffers(mpd); err = mpage_map_and_submit_buffers(mpd);
if (err < 0) if (err < 0)
return err; goto update_disksize;
} while (map->m_len); } while (map->m_len);
update_disksize:
/* /*
* Update on-disk size after IO is submitted. Races with * Update on-disk size after IO is submitted. Races with
* truncate are avoided by checking i_size under i_data_sem. * truncate are avoided by checking i_size under i_data_sem.
......
...@@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, ...@@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
int last = first + count - 1; int last = first + count - 1;
struct super_block *sb = e4b->bd_sb; struct super_block *sb = e4b->bd_sb;
if (WARN_ON(count == 0))
return;
BUG_ON(last >= (sb->s_blocksize << 3)); BUG_ON(last >= (sb->s_blocksize << 3));
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
/* Don't bother if the block group is corrupt. */ /* Don't bother if the block group is corrupt. */
...@@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) ...@@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
int err; int err;
if (pa == NULL) { if (pa == NULL) {
if (ac->ac_f_ex.fe_len == 0)
return;
err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
if (err) { if (err) {
/* /*
...@@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) ...@@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
ac->ac_f_ex.fe_len); ac->ac_f_ex.fe_len);
ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
ext4_mb_unload_buddy(&e4b);
return; return;
} }
if (pa->pa_type == MB_INODE_PA) if (pa->pa_type == MB_INODE_PA)
......
...@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, ...@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
buffer */ buffer */
int num = 0; int num = 0;
ext4_lblk_t nblocks; ext4_lblk_t nblocks;
int i, err; int i, err = 0;
int namelen; int namelen;
*res_dir = NULL; *res_dir = NULL;
...@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, ...@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
* return. Otherwise, fall back to doing a search the * return. Otherwise, fall back to doing a search the
* old fashioned way. * old fashioned way.
*/ */
if (bh || (err != ERR_BAD_DX_DIR)) if (err == -ENOENT)
return NULL;
if (err && err != ERR_BAD_DX_DIR)
return ERR_PTR(err);
if (bh)
return bh; return bh;
dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
"falling back\n")); "falling back\n"));
...@@ -1295,6 +1299,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir, ...@@ -1295,6 +1299,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
} }
num++; num++;
bh = ext4_getblk(NULL, dir, b++, 0, &err); bh = ext4_getblk(NULL, dir, b++, 0, &err);
if (unlikely(err)) {
if (ra_max == 0)
return ERR_PTR(err);
break;
}
bh_use[ra_max] = bh; bh_use[ra_max] = bh;
if (bh) if (bh)
ll_rw_block(READ | REQ_META | REQ_PRIO, ll_rw_block(READ | REQ_META | REQ_PRIO,
...@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi ...@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
return ERR_PTR(-ENAMETOOLONG); return ERR_PTR(-ENAMETOOLONG);
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
if (IS_ERR(bh))
return (struct dentry *) bh;
inode = NULL; inode = NULL;
if (bh) { if (bh) {
__u32 ino = le32_to_cpu(de->inode); __u32 ino = le32_to_cpu(de->inode);
...@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child) ...@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
struct buffer_head *bh; struct buffer_head *bh;
bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL); bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
if (IS_ERR(bh))
return (struct dentry *) bh;
if (!bh) if (!bh)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
ino = le32_to_cpu(de->inode); ino = le32_to_cpu(de->inode);
...@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) ...@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
retval = -ENOENT; retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
if (IS_ERR(bh))
return PTR_ERR(bh);
if (!bh) if (!bh)
goto end_rmdir; goto end_rmdir;
...@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) ...@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
retval = -ENOENT; retval = -ENOENT;
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
if (IS_ERR(bh))
return PTR_ERR(bh);
if (!bh) if (!bh)
goto end_unlink; goto end_unlink;
...@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, ...@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
struct ext4_dir_entry_2 *de; struct ext4_dir_entry_2 *de;
bh = ext4_find_entry(dir, d_name, &de, NULL); bh = ext4_find_entry(dir, d_name, &de, NULL);
if (IS_ERR(bh))
return PTR_ERR(bh);
if (bh) { if (bh) {
retval = ext4_delete_entry(handle, dir, de, bh); retval = ext4_delete_entry(handle, dir, de, bh);
brelse(bh); brelse(bh);
...@@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, ...@@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
return retval; return retval;
} }
static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
int force_reread)
{ {
int retval; int retval;
/* /*
...@@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) ...@@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
ent->de->name_len != ent->dentry->d_name.len || ent->de->name_len != ent->dentry->d_name.len ||
strncmp(ent->de->name, ent->dentry->d_name.name, strncmp(ent->de->name, ent->dentry->d_name.name,
ent->de->name_len)) { ent->de->name_len) ||
force_reread) {
retval = ext4_find_delete_entry(handle, ent->dir, retval = ext4_find_delete_entry(handle, ent->dir,
&ent->dentry->d_name); &ent->dentry->d_name);
} else { } else {
...@@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
.dentry = new_dentry, .dentry = new_dentry,
.inode = new_dentry->d_inode, .inode = new_dentry->d_inode,
}; };
int force_reread;
int retval; int retval;
dquot_initialize(old.dir); dquot_initialize(old.dir);
...@@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
dquot_initialize(new.inode); dquot_initialize(new.inode);
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
if (IS_ERR(old.bh))
return PTR_ERR(old.bh);
/* /*
* Check for inode number is _not_ due to possible IO errors. * Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process * We might rmdir the source, keep it as pwd of some process
...@@ -3214,6 +3238,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3214,6 +3238,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined); &new.de, &new.inlined);
if (IS_ERR(new.bh)) {
retval = PTR_ERR(new.bh);
goto end_rename;
}
if (new.bh) { if (new.bh) {
if (!new.inode) { if (!new.inode) {
brelse(new.bh); brelse(new.bh);
...@@ -3246,6 +3274,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3246,6 +3274,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (retval) if (retval)
goto end_rename; goto end_rename;
} }
/*
* If we're renaming a file within an inline_data dir and adding or
* setting the new dirent causes a conversion from inline_data to
* extents/blockmap, we need to force the dirent delete code to
* re-read the directory, or else we end up trying to delete a dirent
* from what is now the extent tree root (or a block map).
*/
force_reread = (new.dir->i_ino == old.dir->i_ino &&
ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
if (!new.bh) { if (!new.bh) {
retval = ext4_add_entry(handle, new.dentry, old.inode); retval = ext4_add_entry(handle, new.dentry, old.inode);
if (retval) if (retval)
...@@ -3256,6 +3293,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3256,6 +3293,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (retval) if (retval)
goto end_rename; goto end_rename;
} }
if (force_reread)
force_reread = !ext4_test_inode_flag(new.dir,
EXT4_INODE_INLINE_DATA);
/* /*
* Like most other Unix systems, set the ctime for inodes on a * Like most other Unix systems, set the ctime for inodes on a
...@@ -3267,7 +3307,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3267,7 +3307,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
/* /*
* ok, that's it * ok, that's it
*/ */
ext4_rename_delete(handle, &old); ext4_rename_delete(handle, &old, force_reread);
if (new.inode) { if (new.inode) {
ext4_dec_count(handle, new.inode); ext4_dec_count(handle, new.inode);
...@@ -3330,6 +3370,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3330,6 +3370,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
&old.de, &old.inlined); &old.de, &old.inlined);
if (IS_ERR(old.bh))
return PTR_ERR(old.bh);
/* /*
* Check for inode number is _not_ due to possible IO errors. * Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process * We might rmdir the source, keep it as pwd of some process
...@@ -3342,6 +3384,10 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -3342,6 +3384,10 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined); &new.de, &new.inlined);
if (IS_ERR(new.bh)) {
retval = PTR_ERR(new.bh);
goto end_rename;
}
/* RENAME_EXCHANGE case: old *and* new must both exist */ /* RENAME_EXCHANGE case: old *and* new must both exist */
if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
......
...@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb) ...@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb)
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
/* journal checksum v2 */ /* journal checksum v3 */
compat = 0; compat = 0;
incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
} else { } else {
/* journal checksum v1 */ /* journal checksum v1 */
compat = JBD2_FEATURE_COMPAT_CHECKSUM; compat = JBD2_FEATURE_COMPAT_CHECKSUM;
...@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb) ...@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
jbd2_journal_clear_features(sbi->s_journal, jbd2_journal_clear_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0, JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
JBD2_FEATURE_INCOMPAT_CSUM_V3 |
JBD2_FEATURE_INCOMPAT_CSUM_V2); JBD2_FEATURE_INCOMPAT_CSUM_V2);
} }
......
...@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) ...@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
struct commit_header *h; struct commit_header *h;
__u32 csum; __u32 csum;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return; return;
h = (struct commit_header *)(bh->b_data); h = (struct commit_header *)(bh->b_data);
...@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) ...@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
return checksum; return checksum;
} }
static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
unsigned long long block) unsigned long long block)
{ {
tag->t_blocknr = cpu_to_be32(block & (u32)~0); tag->t_blocknr = cpu_to_be32(block & (u32)~0);
if (tag_bytes > JBD2_TAG_SIZE32) if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT))
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
} }
...@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j, ...@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j,
struct jbd2_journal_block_tail *tail; struct jbd2_journal_block_tail *tail;
__u32 csum; __u32 csum;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return; return;
tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
...@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j, ...@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j,
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
struct buffer_head *bh, __u32 sequence) struct buffer_head *bh, __u32 sequence)
{ {
journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
struct page *page = bh->b_page; struct page *page = bh->b_page;
__u8 *addr; __u8 *addr;
__u32 csum32; __u32 csum32;
__be32 seq; __be32 seq;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return; return;
seq = cpu_to_be32(sequence); seq = cpu_to_be32(sequence);
...@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, ...@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
bh->b_size); bh->b_size);
kunmap_atomic(addr); kunmap_atomic(addr);
/* We only have space to store the lower 16 bits of the crc32c. */ if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
tag->t_checksum = cpu_to_be16(csum32); tag3->t_checksum = cpu_to_be32(csum32);
else
tag->t_checksum = cpu_to_be16(csum32);
} }
/* /*
* jbd2_journal_commit_transaction * jbd2_journal_commit_transaction
...@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
LIST_HEAD(io_bufs); LIST_HEAD(io_bufs);
LIST_HEAD(log_bufs); LIST_HEAD(log_bufs);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_block_tail); csum_size = sizeof(struct jbd2_journal_block_tail);
/* /*
...@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tag_flag |= JBD2_FLAG_SAME_UUID; tag_flag |= JBD2_FLAG_SAME_UUID;
tag = (journal_block_tag_t *) tagp; tag = (journal_block_tag_t *) tagp;
write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
tag->t_flags = cpu_to_be16(tag_flag); tag->t_flags = cpu_to_be16(tag_flag);
jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
commit_transaction->t_tid); commit_transaction->t_tid);
......
...@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug); ...@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug);
/* Checksumming functions */ /* Checksumming functions */
static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
{ {
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return 1; return 1;
return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
...@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) ...@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
{ {
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return 1; return 1;
return sb->s_checksum == jbd2_superblock_csum(j, sb); return sb->s_checksum == jbd2_superblock_csum(j, sb);
...@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) ...@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
{ {
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return; return;
sb->s_checksum = jbd2_superblock_csum(j, sb); sb->s_checksum = jbd2_superblock_csum(j, sb);
...@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal) ...@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal)
goto out; goto out;
} }
if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && if (jbd2_journal_has_csum_v2or3(journal) &&
JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
/* Can't have checksum v1 and v2 on at the same time! */ /* Can't have checksum v1 and v2 on at the same time! */
printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
"at the same time!\n"); "at the same time!\n");
goto out; goto out;
} }
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
/* Can't have checksum v2 and v3 at the same time! */
printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
"at the same time!\n");
goto out;
}
if (!jbd2_verify_csum_type(journal, sb)) { if (!jbd2_verify_csum_type(journal, sb)) {
printk(KERN_ERR "JBD2: Unknown checksum type\n"); printk(KERN_ERR "JBD2: Unknown checksum type\n");
goto out; goto out;
} }
/* Load the checksum driver */ /* Load the checksum driver */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { if (jbd2_journal_has_csum_v2or3(journal)) {
journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
if (IS_ERR(journal->j_chksum_driver)) { if (IS_ERR(journal->j_chksum_driver)) {
printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
...@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal) ...@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal)
} }
/* Precompute checksum seed for all metadata */ /* Precompute checksum seed for all metadata */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (jbd2_journal_has_csum_v2or3(journal))
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid)); sizeof(sb->s_uuid));
...@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, ...@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
return 0; return 0;
/* Asking for checksumming v2 and v1? Only give them v2. */ /* If enabling v2 checksums, turn on v3 instead */
if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
}
/* Asking for checksumming v3 and v1? Only give them v3. */
if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
compat & JBD2_FEATURE_COMPAT_CHECKSUM) compat & JBD2_FEATURE_COMPAT_CHECKSUM)
compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
...@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, ...@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb = journal->j_superblock; sb = journal->j_superblock;
/* If enabling v2 checksums, update superblock */ /* If enabling v3 checksums, update superblock */
if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
sb->s_checksum_type = JBD2_CRC32C_CHKSUM; sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
sb->s_feature_compat &= sb->s_feature_compat &=
~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
...@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, ...@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
} }
/* Precompute checksum seed for all metadata */ /* Precompute checksum seed for all metadata */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, if (jbd2_journal_has_csum_v2or3(journal))
JBD2_FEATURE_INCOMPAT_CSUM_V2))
journal->j_csum_seed = jbd2_chksum(journal, ~0, journal->j_csum_seed = jbd2_chksum(journal, ~0,
sb->s_uuid, sb->s_uuid,
sizeof(sb->s_uuid)); sizeof(sb->s_uuid));
...@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, ...@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
/* If enabling v1 checksums, downgrade superblock */ /* If enabling v1 checksums, downgrade superblock */
if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
sb->s_feature_incompat &= sb->s_feature_incompat &=
~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
JBD2_FEATURE_INCOMPAT_CSUM_V3);
sb->s_feature_compat |= cpu_to_be32(compat); sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro); sb->s_feature_ro_compat |= cpu_to_be32(ro);
...@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode) ...@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
*/ */
size_t journal_tag_bytes(journal_t *journal) size_t journal_tag_bytes(journal_t *journal)
{ {
journal_block_tag_t tag; size_t sz;
size_t x = 0;
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
return sizeof(journal_block_tag3_t);
sz = sizeof(journal_block_tag_t);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
x += sizeof(tag.t_checksum); sz += sizeof(__u16);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
return x + JBD2_TAG_SIZE64; return sz;
else else
return x + JBD2_TAG_SIZE32; return sz - sizeof(__u32);
} }
/* /*
......
...@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, ...@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
__be32 provided; __be32 provided;
__u32 calculated; __u32 calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return 1; return 1;
tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
...@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) ...@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
int nr = 0, size = journal->j_blocksize; int nr = 0, size = journal->j_blocksize;
int tag_bytes = journal_tag_bytes(journal); int tag_bytes = journal_tag_bytes(journal);
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (jbd2_journal_has_csum_v2or3(journal))
size -= sizeof(struct jbd2_journal_block_tail); size -= sizeof(struct jbd2_journal_block_tail);
tagp = &bh->b_data[sizeof(journal_header_t)]; tagp = &bh->b_data[sizeof(journal_header_t)];
...@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal) ...@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal)
return err; return err;
} }
static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) static inline unsigned long long read_tag_block(journal_t *journal,
journal_block_tag_t *tag)
{ {
unsigned long long block = be32_to_cpu(tag->t_blocknr); unsigned long long block = be32_to_cpu(tag->t_blocknr);
if (tag_bytes > JBD2_TAG_SIZE32) if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
return block; return block;
} }
...@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) ...@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
__be32 provided; __be32 provided;
__u32 calculated; __u32 calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return 1; return 1;
h = buf; h = buf;
...@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) ...@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
void *buf, __u32 sequence) void *buf, __u32 sequence)
{ {
journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
__u32 csum32; __u32 csum32;
__be32 seq; __be32 seq;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return 1; return 1;
seq = cpu_to_be32(sequence); seq = cpu_to_be32(sequence);
csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
return tag->t_checksum == cpu_to_be16(csum32); if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
return tag3->t_checksum == cpu_to_be32(csum32);
else
return tag->t_checksum == cpu_to_be16(csum32);
} }
static int do_one_pass(journal_t *journal, static int do_one_pass(journal_t *journal,
...@@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal, ...@@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal,
int tag_bytes = journal_tag_bytes(journal); int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */ __u32 crc32_sum = ~0; /* Transactional Checksums */
int descr_csum_size = 0; int descr_csum_size = 0;
int block_error = 0;
/* /*
* First thing is to establish what we expect to find in the log * First thing is to establish what we expect to find in the log
...@@ -512,8 +518,7 @@ static int do_one_pass(journal_t *journal, ...@@ -512,8 +518,7 @@ static int do_one_pass(journal_t *journal,
switch(blocktype) { switch(blocktype) {
case JBD2_DESCRIPTOR_BLOCK: case JBD2_DESCRIPTOR_BLOCK:
/* Verify checksum first */ /* Verify checksum first */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, if (jbd2_journal_has_csum_v2or3(journal))
JBD2_FEATURE_INCOMPAT_CSUM_V2))
descr_csum_size = descr_csum_size =
sizeof(struct jbd2_journal_block_tail); sizeof(struct jbd2_journal_block_tail);
if (descr_csum_size > 0 && if (descr_csum_size > 0 &&
...@@ -574,7 +579,7 @@ static int do_one_pass(journal_t *journal, ...@@ -574,7 +579,7 @@ static int do_one_pass(journal_t *journal,
unsigned long long blocknr; unsigned long long blocknr;
J_ASSERT(obh != NULL); J_ASSERT(obh != NULL);
blocknr = read_tag_block(tag_bytes, blocknr = read_tag_block(journal,
tag); tag);
/* If the block has been /* If the block has been
...@@ -598,7 +603,8 @@ static int do_one_pass(journal_t *journal, ...@@ -598,7 +603,8 @@ static int do_one_pass(journal_t *journal,
"checksum recovering " "checksum recovering "
"block %llu in log\n", "block %llu in log\n",
blocknr); blocknr);
continue; block_error = 1;
goto skip_write;
} }
/* Find a buffer for the new /* Find a buffer for the new
...@@ -797,7 +803,8 @@ static int do_one_pass(journal_t *journal, ...@@ -797,7 +803,8 @@ static int do_one_pass(journal_t *journal,
success = -EIO; success = -EIO;
} }
} }
if (block_error && success == 0)
success = -EIO;
return success; return success;
failed: failed:
...@@ -811,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, ...@@ -811,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
__be32 provided; __be32 provided;
__u32 calculated; __u32 calculated;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return 1; return 1;
tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
......
...@@ -91,8 +91,8 @@ ...@@ -91,8 +91,8 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/bio.h> #include <linux/bio.h>
#endif
#include <linux/log2.h> #include <linux/log2.h>
#endif
static struct kmem_cache *jbd2_revoke_record_cache; static struct kmem_cache *jbd2_revoke_record_cache;
static struct kmem_cache *jbd2_revoke_table_cache; static struct kmem_cache *jbd2_revoke_table_cache;
...@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal, ...@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal,
offset = *offsetp; offset = *offsetp;
/* Do we need to leave space at the end for a checksum? */ /* Do we need to leave space at the end for a checksum? */
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (jbd2_journal_has_csum_v2or3(journal))
csum_size = sizeof(struct jbd2_journal_revoke_tail); csum_size = sizeof(struct jbd2_journal_revoke_tail);
/* Make sure we have a descriptor with space left for the record */ /* Make sure we have a descriptor with space left for the record */
...@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) ...@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
struct jbd2_journal_revoke_tail *tail; struct jbd2_journal_revoke_tail *tail;
__u32 csum; __u32 csum;
if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) if (!jbd2_journal_has_csum_v2or3(j))
return; return;
tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
......
...@@ -159,7 +159,11 @@ typedef struct journal_header_s ...@@ -159,7 +159,11 @@ typedef struct journal_header_s
* journal_block_tag (in the descriptor). The other h_chksum* fields are * journal_block_tag (in the descriptor). The other h_chksum* fields are
* not used. * not used.
* *
* Checksum v1 and v2 are mutually exclusive features. * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses
* journal_block_tag3_t to store a full 32-bit checksum. Everything else
* is the same as v2.
*
* Checksum v1, v2, and v3 are mutually exclusive features.
*/ */
struct commit_header { struct commit_header {
__be32 h_magic; __be32 h_magic;
...@@ -179,6 +183,14 @@ struct commit_header { ...@@ -179,6 +183,14 @@ struct commit_header {
* raw struct shouldn't be used for pointer math or sizeof() - use * raw struct shouldn't be used for pointer math or sizeof() - use
* journal_tag_bytes(journal) instead to compute this. * journal_tag_bytes(journal) instead to compute this.
*/ */
typedef struct journal_block_tag3_s
{
__be32 t_blocknr; /* The on-disk block number */
__be32 t_flags; /* See below */
__be32 t_blocknr_high; /* most-significant high 32bits. */
__be32 t_checksum; /* crc32c(uuid+seq+block) */
} journal_block_tag3_t;
typedef struct journal_block_tag_s typedef struct journal_block_tag_s
{ {
__be32 t_blocknr; /* The on-disk block number */ __be32 t_blocknr; /* The on-disk block number */
...@@ -187,9 +199,6 @@ typedef struct journal_block_tag_s ...@@ -187,9 +199,6 @@ typedef struct journal_block_tag_s
__be32 t_blocknr_high; /* most-significant high 32bits. */ __be32 t_blocknr_high; /* most-significant high 32bits. */
} journal_block_tag_t; } journal_block_tag_t;
#define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
#define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t))
/* Tail of descriptor block, for checksumming */ /* Tail of descriptor block, for checksumming */
struct jbd2_journal_block_tail { struct jbd2_journal_block_tail {
__be32 t_checksum; /* crc32c(uuid+descr_block) */ __be32 t_checksum; /* crc32c(uuid+descr_block) */
...@@ -284,6 +293,7 @@ typedef struct journal_superblock_s ...@@ -284,6 +293,7 @@ typedef struct journal_superblock_s
#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
#define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008
#define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010
/* Features known to this kernel version: */ /* Features known to this kernel version: */
#define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM
...@@ -291,7 +301,8 @@ typedef struct journal_superblock_s ...@@ -291,7 +301,8 @@ typedef struct journal_superblock_s
#define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \
JBD2_FEATURE_INCOMPAT_64BIT | \ JBD2_FEATURE_INCOMPAT_64BIT | \
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
JBD2_FEATURE_INCOMPAT_CSUM_V2) JBD2_FEATURE_INCOMPAT_CSUM_V2 | \
JBD2_FEATURE_INCOMPAT_CSUM_V3)
#ifdef __KERNEL__ #ifdef __KERNEL__
...@@ -1296,6 +1307,15 @@ static inline int tid_geq(tid_t x, tid_t y) ...@@ -1296,6 +1307,15 @@ static inline int tid_geq(tid_t x, tid_t y)
extern int jbd2_journal_blocks_per_page(struct inode *inode); extern int jbd2_journal_blocks_per_page(struct inode *inode);
extern size_t journal_tag_bytes(journal_t *journal); extern size_t journal_tag_bytes(journal_t *journal);
static inline int jbd2_journal_has_csum_v2or3(journal_t *journal)
{
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) ||
JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
return 1;
return 0;
}
/* /*
* We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for * We reserve t_outstanding_credits >> JBD2_CONTROL_BLOCKS_SHIFT for
* transaction control blocks. * transaction control blocks.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment