Commit c2b6d621 authored by Al Viro's avatar Al Viro

new primitive: discard_new_inode()

	We don't want open-by-handle picking half-set-up in-core
struct inode from e.g. mkdir() having failed halfway through.
In other words, we don't want such inodes returned by iget_locked()
on their way to extinction.  However, we can't just have them
unhashed - otherwise open-by-handle immediately *after* that would've
ended up creating a new in-core inode over the on-disk one that
is in process of being freed right under us.

	Solution: new flag (I_CREATING) set by insert_inode_locked() and
removed by unlock_new_inode() and a new primitive (discard_new_inode())
to be used by such halfway-through-setup failure exits instead of
unlock_new_inode() / iput() combinations.  That primitive unlocks new
inode, but leaves I_CREATING in place.

	iget_locked() treats finding an I_CREATING inode as failure
(-ESTALE, once we sort out the error propagation).
	insert_inode_locked() treats the same as instant -EBUSY.
	ilookup() treats those as icache miss.

[Fix by Dan Carpenter <dan.carpenter@oracle.com> folded in]
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent c971e6a0
...@@ -1892,7 +1892,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) ...@@ -1892,7 +1892,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
__d_instantiate(entry, inode); __d_instantiate(entry, inode);
WARN_ON(!(inode->i_state & I_NEW)); WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW; inode->i_state &= ~I_NEW & ~I_CREATING;
smp_mb(); smp_mb();
wake_up_bit(&inode->i_state, __I_NEW); wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
......
...@@ -804,6 +804,10 @@ static struct inode *find_inode(struct super_block *sb, ...@@ -804,6 +804,10 @@ static struct inode *find_inode(struct super_block *sb,
__wait_on_freeing_inode(inode); __wait_on_freeing_inode(inode);
goto repeat; goto repeat;
} }
if (unlikely(inode->i_state & I_CREATING)) {
spin_unlock(&inode->i_lock);
return ERR_PTR(-ESTALE);
}
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return inode; return inode;
...@@ -831,6 +835,10 @@ static struct inode *find_inode_fast(struct super_block *sb, ...@@ -831,6 +835,10 @@ static struct inode *find_inode_fast(struct super_block *sb,
__wait_on_freeing_inode(inode); __wait_on_freeing_inode(inode);
goto repeat; goto repeat;
} }
if (unlikely(inode->i_state & I_CREATING)) {
spin_unlock(&inode->i_lock);
return ERR_PTR(-ESTALE);
}
__iget(inode); __iget(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return inode; return inode;
...@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode) ...@@ -961,13 +969,26 @@ void unlock_new_inode(struct inode *inode)
lockdep_annotate_inode_mutex_key(inode); lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW)); WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW; inode->i_state &= ~I_NEW & ~I_CREATING;
smp_mb(); smp_mb();
wake_up_bit(&inode->i_state, __I_NEW); wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
EXPORT_SYMBOL(unlock_new_inode); EXPORT_SYMBOL(unlock_new_inode);
void discard_new_inode(struct inode *inode)
{
lockdep_annotate_inode_mutex_key(inode);
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW;
smp_mb();
wake_up_bit(&inode->i_state, __I_NEW);
spin_unlock(&inode->i_lock);
iput(inode);
}
EXPORT_SYMBOL(discard_new_inode);
/** /**
* lock_two_nondirectories - take two i_mutexes on non-directory objects * lock_two_nondirectories - take two i_mutexes on non-directory objects
* *
...@@ -1039,6 +1060,8 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, ...@@ -1039,6 +1060,8 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
* Use the old inode instead of the preallocated one. * Use the old inode instead of the preallocated one.
*/ */
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
if (IS_ERR(old))
return NULL;
wait_on_inode(old); wait_on_inode(old);
if (unlikely(inode_unhashed(old))) { if (unlikely(inode_unhashed(old))) {
iput(old); iput(old);
...@@ -1128,6 +1151,8 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) ...@@ -1128,6 +1151,8 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
inode = find_inode_fast(sb, head, ino); inode = find_inode_fast(sb, head, ino);
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
if (inode) { if (inode) {
if (IS_ERR(inode))
return NULL;
wait_on_inode(inode); wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) { if (unlikely(inode_unhashed(inode))) {
iput(inode); iput(inode);
...@@ -1165,6 +1190,8 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) ...@@ -1165,6 +1190,8 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
*/ */
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
destroy_inode(inode); destroy_inode(inode);
if (IS_ERR(old))
return NULL;
inode = old; inode = old;
wait_on_inode(inode); wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) { if (unlikely(inode_unhashed(inode))) {
...@@ -1282,7 +1309,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, ...@@ -1282,7 +1309,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
inode = find_inode(sb, head, test, data); inode = find_inode(sb, head, test, data);
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
return inode; return IS_ERR(inode) ? NULL : inode;
} }
EXPORT_SYMBOL(ilookup5_nowait); EXPORT_SYMBOL(ilookup5_nowait);
...@@ -1338,6 +1365,8 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) ...@@ -1338,6 +1365,8 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
if (inode) { if (inode) {
if (IS_ERR(inode))
return NULL;
wait_on_inode(inode); wait_on_inode(inode);
if (unlikely(inode_unhashed(inode))) { if (unlikely(inode_unhashed(inode))) {
iput(inode); iput(inode);
...@@ -1421,12 +1450,17 @@ int insert_inode_locked(struct inode *inode) ...@@ -1421,12 +1450,17 @@ int insert_inode_locked(struct inode *inode)
} }
if (likely(!old)) { if (likely(!old)) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
inode->i_state |= I_NEW; inode->i_state |= I_NEW | I_CREATING;
hlist_add_head(&inode->i_hash, head); hlist_add_head(&inode->i_hash, head);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
return 0; return 0;
} }
if (unlikely(old->i_state & I_CREATING)) {
spin_unlock(&old->i_lock);
spin_unlock(&inode_hash_lock);
return -EBUSY;
}
__iget(old); __iget(old);
spin_unlock(&old->i_lock); spin_unlock(&old->i_lock);
spin_unlock(&inode_hash_lock); spin_unlock(&inode_hash_lock);
...@@ -1443,7 +1477,10 @@ EXPORT_SYMBOL(insert_inode_locked); ...@@ -1443,7 +1477,10 @@ EXPORT_SYMBOL(insert_inode_locked);
int insert_inode_locked4(struct inode *inode, unsigned long hashval, int insert_inode_locked4(struct inode *inode, unsigned long hashval,
int (*test)(struct inode *, void *), void *data) int (*test)(struct inode *, void *), void *data)
{ {
struct inode *old = inode_insert5(inode, hashval, test, NULL, data); struct inode *old;
inode->i_state |= I_CREATING;
old = inode_insert5(inode, hashval, test, NULL, data);
if (old != inode) { if (old != inode) {
iput(old); iput(old);
......
...@@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) ...@@ -2016,6 +2016,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
* I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper * I_OVL_INUSE Used by overlayfs to get exclusive ownership on upper
* and work dirs among overlayfs mounts. * and work dirs among overlayfs mounts.
* *
* I_CREATING New object's inode in the middle of setting up.
*
* Q: What is the difference between I_WILL_FREE and I_FREEING? * Q: What is the difference between I_WILL_FREE and I_FREEING?
*/ */
#define I_DIRTY_SYNC (1 << 0) #define I_DIRTY_SYNC (1 << 0)
...@@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) ...@@ -2036,7 +2038,8 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
#define __I_DIRTY_TIME_EXPIRED 12 #define __I_DIRTY_TIME_EXPIRED 12
#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
#define I_WB_SWITCH (1 << 13) #define I_WB_SWITCH (1 << 13)
#define I_OVL_INUSE (1 << 14) #define I_OVL_INUSE (1 << 14)
#define I_CREATING (1 << 15)
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
#define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
...@@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode); ...@@ -2919,6 +2922,7 @@ extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { }; static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
#endif #endif
extern void unlock_new_inode(struct inode *); extern void unlock_new_inode(struct inode *);
extern void discard_new_inode(struct inode *);
extern unsigned int get_next_ino(void); extern unsigned int get_next_ino(void);
extern void evict_inodes(struct super_block *sb); extern void evict_inodes(struct super_block *sb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment