Commit 30276fd6 authored by Andrew Morton's avatar Andrew Morton Committed by Greg Kroah-Hartman

[PATCH] ext3: fix page lock vs journal_start ranking bug

ext3_block_truncate_page() is calling grab_cache_page() inside a JBD
transaction.  This is wrong, because transactions nest inside lock_page().

The deadlock is against shrink_list->ext3_journalled_writepage->journal_start.

This was not noticed before because we never used to journal writepage() data
in journalled-data mode.  And because the deadlock against
generic_file_write() is covered up by i_sem.

Rework things so that we lock the page prior to starting a transaction.
parent b70732ef
...@@ -1662,33 +1662,21 @@ void ext3_set_aops(struct inode *inode) ...@@ -1662,33 +1662,21 @@ void ext3_set_aops(struct inode *inode)
* This required during truncate. We need to physically zero the tail end * This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown. * of that block so it doesn't yield old data if the file is later grown.
*/ */
static int ext3_block_truncate_page(handle_t *handle, static int ext3_block_truncate_page(handle_t *handle, struct page *page,
struct address_space *mapping, loff_t from) struct address_space *mapping, loff_t from)
{ {
unsigned long index = from >> PAGE_CACHE_SHIFT; unsigned long index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned offset = from & (PAGE_CACHE_SIZE-1);
unsigned blocksize, iblock, length, pos; unsigned blocksize, iblock, length, pos;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct page *page;
struct buffer_head *bh; struct buffer_head *bh;
int err; int err;
void *kaddr; void *kaddr;
blocksize = inode->i_sb->s_blocksize; blocksize = inode->i_sb->s_blocksize;
length = offset & (blocksize - 1); length = blocksize - (offset & (blocksize - 1));
/* Block boundary? Nothing to do */
if (!length)
return 0;
length = blocksize - length;
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
page = grab_cache_page(mapping, index);
err = -ENOMEM;
if (!page)
goto out;
if (!page_has_buffers(page)) if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0); create_empty_buffers(page, blocksize, 0);
...@@ -1756,7 +1744,6 @@ static int ext3_block_truncate_page(handle_t *handle, ...@@ -1756,7 +1744,6 @@ static int ext3_block_truncate_page(handle_t *handle,
unlock: unlock:
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
out:
return err; return err;
} }
...@@ -2137,13 +2124,15 @@ void ext3_truncate(struct inode * inode) ...@@ -2137,13 +2124,15 @@ void ext3_truncate(struct inode * inode)
struct ext3_inode_info *ei = EXT3_I(inode); struct ext3_inode_info *ei = EXT3_I(inode);
u32 *i_data = ei->i_data; u32 *i_data = ei->i_data;
int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
struct address_space *mapping = inode->i_mapping;
int offsets[4]; int offsets[4];
Indirect chain[4]; Indirect chain[4];
Indirect *partial; Indirect *partial;
int nr = 0; int nr = 0;
int n; int n;
long last_block; long last_block;
unsigned blocksize; unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page;
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode))) S_ISLNK(inode->i_mode)))
...@@ -2155,16 +2144,36 @@ void ext3_truncate(struct inode * inode) ...@@ -2155,16 +2144,36 @@ void ext3_truncate(struct inode * inode)
ext3_discard_prealloc(inode); ext3_discard_prealloc(inode);
/*
* We have to lock the EOF page here, because lock_page() nests
* outside journal_start().
*/
if ((inode->i_size & (blocksize - 1)) == 0) {
/* Block boundary? Nothing to do */
page = NULL;
} else {
page = grab_cache_page(mapping,
inode->i_size >> PAGE_CACHE_SHIFT);
if (!page)
return;
}
handle = start_transaction(inode); handle = start_transaction(inode);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
if (page) {
clear_highpage(page);
flush_dcache_page(page);
unlock_page(page);
page_cache_release(page);
}
return; /* AKPM: return what? */ return; /* AKPM: return what? */
} }
blocksize = inode->i_sb->s_blocksize;
last_block = (inode->i_size + blocksize-1) last_block = (inode->i_size + blocksize-1)
>> EXT3_BLOCK_SIZE_BITS(inode->i_sb); >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
ext3_block_truncate_page(handle, inode->i_mapping, inode->i_size); if (page)
ext3_block_truncate_page(handle, page, mapping, inode->i_size);
n = ext3_block_to_path(inode, last_block, offsets, NULL); n = ext3_block_to_path(inode, last_block, offsets, NULL);
if (n == 0) if (n == 0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment