Commit 991114c6 authored by Alexander Viro's avatar Alexander Viro Committed by Linus Torvalds

[PATCH] fix for prune_icache()/forced final iput() races

Based on analysis and a patch from Russ Weight <rweight@us.ibm.com>

There is a race condition that can occur if an inode is allocated and then
released (using iput) during the ->fill_super functions.  The race
condition is between kswapd and mount.

For most filesystems this can only happen in an error path when kswapd is
running concurrently.  For isofs, however, the error can occur in a more
common code path (which is how the bug was found).

The logic here is "we want final iput() to free inode *now* instead of
letting it sit in cache if fs is going down or had not quite come up".  The
problem is with kswapd seeing such inodes in the middle of being killed and
happily taking over.

The clean solution would be to tell kswapd to leave those inodes alone and
let our final iput deal with them.  I.e.  add a new flag
(I_FORCED_FREEING), set it before write_inode_now() there and make
prune_icache() leave those alone.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent f972be33
...@@ -500,7 +500,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea ...@@ -500,7 +500,7 @@ static struct inode * find_inode(struct super_block * sb, struct hlist_head *hea
continue; continue;
if (!test(inode, data)) if (!test(inode, data))
continue; continue;
if (inode->i_state & (I_FREEING|I_CLEAR)) { if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
__wait_on_freeing_inode(inode); __wait_on_freeing_inode(inode);
goto repeat; goto repeat;
} }
...@@ -525,7 +525,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head ...@@ -525,7 +525,7 @@ static struct inode * find_inode_fast(struct super_block * sb, struct hlist_head
continue; continue;
if (inode->i_sb != sb) if (inode->i_sb != sb)
continue; continue;
if (inode->i_state & (I_FREEING|I_CLEAR)) { if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
__wait_on_freeing_inode(inode); __wait_on_freeing_inode(inode);
goto repeat; goto repeat;
} }
...@@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique); ...@@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique);
struct inode *igrab(struct inode *inode) struct inode *igrab(struct inode *inode)
{ {
spin_lock(&inode_lock); spin_lock(&inode_lock);
if (!(inode->i_state & I_FREEING)) if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
__iget(inode); __iget(inode);
else else
/* /*
...@@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode) ...@@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode)
if (!(inode->i_state & (I_DIRTY|I_LOCK))) if (!(inode->i_state & (I_DIRTY|I_LOCK)))
list_move(&inode->i_list, &inode_unused); list_move(&inode->i_list, &inode_unused);
inodes_stat.nr_unused++; inodes_stat.nr_unused++;
spin_unlock(&inode_lock); if (!sb || (sb->s_flags & MS_ACTIVE)) {
if (!sb || (sb->s_flags & MS_ACTIVE)) spin_unlock(&inode_lock);
return; return;
}
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode_lock);
write_inode_now(inode, 1); write_inode_now(inode, 1);
spin_lock(&inode_lock); spin_lock(&inode_lock);
inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--; inodes_stat.nr_unused--;
hlist_del_init(&inode->i_hash); hlist_del_init(&inode->i_hash);
} }
list_del_init(&inode->i_list); list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list); list_del_init(&inode->i_sb_list);
inode->i_state|=I_FREEING; inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--; inodes_stat.nr_inodes--;
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
if (inode->i_data.nrpages) if (inode->i_data.nrpages)
......
...@@ -1025,6 +1025,7 @@ struct super_operations { ...@@ -1025,6 +1025,7 @@ struct super_operations {
#define I_FREEING 16 #define I_FREEING 16
#define I_CLEAR 32 #define I_CLEAR 32
#define I_NEW 64 #define I_NEW 64
#define I_WILL_FREE 128
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment