Commit 90e775b7 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o

ext4: fix lost truncate due to race with writeback

The following race can lead to a loss of i_disksize update from truncate
thus resulting in a wrong inode size if the inode size isn't updated
again before inode is reclaimed:

ext4_setattr()				mpage_map_and_submit_extent()
  EXT4_I(inode)->i_disksize = attr->ia_size;
  ...					  ...
					  disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT
					  /* False because i_size isn't
					   * updated yet */
					  if (disksize > i_size_read(inode))
					  /* True, because i_disksize is
					   * already truncated */
					  if (disksize > EXT4_I(inode)->i_disksize)
					    /* Overwrite i_disksize
					     * update from truncate */
					    ext4_update_i_disksize()
  i_size_write(inode, attr->ia_size);

For other places updating i_disksize such race cannot happen because
i_mutex prevents these races. Writeback is the only place where we do
not hold i_mutex and we cannot grab it there because of lock ordering.

We fix the race by doing both i_disksize and i_size update in truncate
atomically under i_data_sem and in mpage_map_and_submit_extent() we move
the check against i_size under i_data_sem as well.
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
Cc: stable@vger.kernel.org
parent 5208386c
...@@ -2432,16 +2432,32 @@ do { \ ...@@ -2432,16 +2432,32 @@ do { \
#define EXT4_FREECLUSTERS_WATERMARK 0 #define EXT4_FREECLUSTERS_WATERMARK 0
#endif #endif
/* Update i_disksize. Requires i_mutex to avoid races with truncate */
static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
{ {
/* WARN_ON_ONCE(S_ISREG(inode->i_mode) &&
* XXX: replace with spinlock if seen contended -bzzz !mutex_is_locked(&inode->i_mutex));
*/ down_write(&EXT4_I(inode)->i_data_sem);
if (newsize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = newsize;
up_write(&EXT4_I(inode)->i_data_sem);
}
/*
* Update i_disksize after writeback has been started. Races with truncate
* are avoided by checking i_size under i_data_sem.
*/
static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
{
loff_t i_size;
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
i_size = i_size_read(inode);
if (newsize > i_size)
newsize = i_size;
if (newsize > EXT4_I(inode)->i_disksize) if (newsize > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = newsize; EXT4_I(inode)->i_disksize = newsize;
up_write(&EXT4_I(inode)->i_data_sem); up_write(&EXT4_I(inode)->i_data_sem);
return ;
} }
struct ext4_group_info { struct ext4_group_info {
......
...@@ -2237,12 +2237,10 @@ static int mpage_map_and_submit_extent(handle_t *handle, ...@@ -2237,12 +2237,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
/* Update on-disk size after IO is submitted */ /* Update on-disk size after IO is submitted */
disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
if (disksize > i_size_read(inode))
disksize = i_size_read(inode);
if (disksize > EXT4_I(inode)->i_disksize) { if (disksize > EXT4_I(inode)->i_disksize) {
int err2; int err2;
ext4_update_i_disksize(inode, disksize); ext4_wb_update_i_disksize(inode, disksize);
err2 = ext4_mark_inode_dirty(handle, inode); err2 = ext4_mark_inode_dirty(handle, inode);
if (err2) if (err2)
ext4_error(inode->i_sb, ext4_error(inode->i_sb,
...@@ -4627,18 +4625,27 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -4627,18 +4625,27 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
error = ext4_orphan_add(handle, inode); error = ext4_orphan_add(handle, inode);
orphan = 1; orphan = 1;
} }
down_write(&EXT4_I(inode)->i_data_sem);
EXT4_I(inode)->i_disksize = attr->ia_size; EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode); rc = ext4_mark_inode_dirty(handle, inode);
if (!error) if (!error)
error = rc; error = rc;
/*
* We have to update i_size under i_data_sem together
* with i_disksize to avoid races with writeback code
* running ext4_wb_update_i_disksize().
*/
if (!error)
i_size_write(inode, attr->ia_size);
up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle); ext4_journal_stop(handle);
if (error) { if (error) {
ext4_orphan_del(NULL, inode); ext4_orphan_del(NULL, inode);
goto err_out; goto err_out;
} }
} } else
i_size_write(inode, attr->ia_size);
i_size_write(inode, attr->ia_size);
/* /*
* Blocks are going to be removed from the inode. Wait * Blocks are going to be removed from the inode. Wait
* for dio in flight. Temporarily disable * for dio in flight. Temporarily disable
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment