Commit 08142579 authored by Jan Kara's avatar Jan Kara Committed by Linus Torvalds

mm: fix assertion mapping->nrpages == 0 in end_writeback()

Under heavy memory and filesystem load, users observe the assertion
mapping->nrpages == 0 in end_writeback() trigger.  This can be caused by
page reclaim reclaiming the last page from a mapping in the following
race:

	CPU0				CPU1
  ...
  shrink_page_list()
    __remove_mapping()
      __delete_from_page_cache()
        radix_tree_delete()
					evict_inode()
					  truncate_inode_pages()
					    truncate_inode_pages_range()
					      pagevec_lookup() - finds nothing
					  end_writeback()
					    mapping->nrpages != 0 -> BUG
        page->mapping = NULL
        mapping->nrpages--

Fix the problem by doing a reliable check of mapping->nrpages under
mapping->tree_lock in end_writeback().

Analyzed by Jay <jinshan.xiong@whamcloud.com>, lost in LKML, and dug out
by Miklos Szeredi <mszeredi@suse.de>.

Cc: Jay <jinshan.xiong@whamcloud.com>
Cc: Miklos Szeredi <mszeredi@suse.de>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9b679320
...@@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash); ...@@ -423,7 +423,14 @@ EXPORT_SYMBOL(remove_inode_hash);
void end_writeback(struct inode *inode) void end_writeback(struct inode *inode)
{ {
might_sleep(); might_sleep();
/*
* We have to cycle tree_lock here because reclaim can be still in the
* process of removing the last page (in __delete_from_page_cache())
* and we must not free mapping under it.
*/
spin_lock_irq(&inode->i_data.tree_lock);
BUG_ON(inode->i_data.nrpages); BUG_ON(inode->i_data.nrpages);
spin_unlock_irq(&inode->i_data.tree_lock);
BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(!(inode->i_state & I_FREEING));
BUG_ON(inode->i_state & I_CLEAR); BUG_ON(inode->i_state & I_CLEAR);
......
...@@ -639,6 +639,7 @@ struct address_space { ...@@ -639,6 +639,7 @@ struct address_space {
struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
struct mutex i_mmap_mutex; /* protect tree, count, list */ struct mutex i_mmap_mutex; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */ unsigned long nrpages; /* number of total pages */
pgoff_t writeback_index;/* writeback starts here */ pgoff_t writeback_index;/* writeback starts here */
const struct address_space_operations *a_ops; /* methods */ const struct address_space_operations *a_ops; /* methods */
......
...@@ -304,6 +304,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range); ...@@ -304,6 +304,11 @@ EXPORT_SYMBOL(truncate_inode_pages_range);
* @lstart: offset from which to truncate * @lstart: offset from which to truncate
* *
* Called under (and serialised by) inode->i_mutex. * Called under (and serialised by) inode->i_mutex.
*
* Note: When this function returns, there can be a page in the process of
* deletion (inside __delete_from_page_cache()) in the specified range. Thus
* mapping->nrpages can be non-zero when this function returns even after
* truncation of the whole mapping.
*/ */
void truncate_inode_pages(struct address_space *mapping, loff_t lstart) void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment