Commit 2fd1d2c4 authored by Eric W. Biederman's avatar Eric W. Biederman

proc: Fix proc_sys_prune_dcache to hold a sb reference

Andrei Vagin writes:
FYI: This bug has been reproduced on 4.11.7
> BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo}  still in use (1) [unmount of proc proc]
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80
> CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1
> Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015
> Workqueue: events proc_cleanup_work
> Call Trace:
>  dump_stack+0x63/0x86
>  __warn+0xcb/0xf0
>  warn_slowpath_null+0x1d/0x20
>  umount_check+0x6e/0x80
>  d_walk+0xc6/0x270
>  ? dentry_free+0x80/0x80
>  do_one_tree+0x26/0x40
>  shrink_dcache_for_umount+0x2d/0x90
>  generic_shutdown_super+0x1f/0xf0
>  kill_anon_super+0x12/0x20
>  proc_kill_sb+0x40/0x50
>  deactivate_locked_super+0x43/0x70
>  deactivate_super+0x5a/0x60
>  cleanup_mnt+0x3f/0x90
>  mntput_no_expire+0x13b/0x190
>  kern_unmount+0x3e/0x50
>  pid_ns_release_proc+0x15/0x20
>  proc_cleanup_work+0x15/0x20
>  process_one_work+0x197/0x450
>  worker_thread+0x4e/0x4a0
>  kthread+0x109/0x140
>  ? process_one_work+0x450/0x450
>  ? kthread_park+0x90/0x90
>  ret_from_fork+0x2c/0x40
> ---[ end trace e1c109611e5d0b41 ]---
> VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds.  Have a nice day...
> BUG: unable to handle kernel NULL pointer dereference at           (null)
> IP: _raw_spin_lock+0xc/0x30
> PGD 0

Fix this by taking a reference to the super block in proc_sys_prune_dcache.

The superblock reference is the core of the fix however the sysctl_inodes
list is converted to a hlist so that hlist_del_init_rcu may be used.  This
allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while
not causing problems for proc_sys_evict_inode when if it later choses to
remove the inode from the sysctl_inodes list.  Removing inodes from the
sysctl_inodes list allows proc_sys_prune_dcache to have a progress
guarantee, while still being able to drop all locks.  The fact that
head->unregistering is set in start_unregistering ensures that no more
inodes will be added to the the sysctl_inodes list.

Previously the code did a dance where it delayed calling iput until the
next entry in the list was being considered to ensure the inode remained on
the sysctl_inodes list until the next entry was walked to.  The structure
of the loop in this patch does not need that so is much easier to
understand and maintain.

Cc: stable@vger.kernel.org
Reported-by: default avatarAndrei Vagin <avagin@gmail.com>
Tested-by: default avatarAndrei Vagin <avagin@openvz.org>
Fixes: ace0c791 ("proc/sysctl: Don't grab i_lock under sysctl_lock.")
Fixes: d6cffbbe ("proc/sysctl: prune stale dentries during unregistering")
Signed-off-by: default avatar"Eric W. Biederman" <ebiederm@xmission.com>
parent 296990de
...@@ -67,7 +67,7 @@ struct proc_inode { ...@@ -67,7 +67,7 @@ struct proc_inode {
struct proc_dir_entry *pde; struct proc_dir_entry *pde;
struct ctl_table_header *sysctl; struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry; struct ctl_table *sysctl_entry;
struct list_head sysctl_inodes; struct hlist_node sysctl_inodes;
const struct proc_ns_operations *ns_ops; const struct proc_ns_operations *ns_ops;
struct inode vfs_inode; struct inode vfs_inode;
}; };
......
...@@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head, ...@@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head,
head->set = set; head->set = set;
head->parent = NULL; head->parent = NULL;
head->node = node; head->node = node;
INIT_LIST_HEAD(&head->inodes); INIT_HLIST_HEAD(&head->inodes);
if (node) { if (node) {
struct ctl_table *entry; struct ctl_table *entry;
for (entry = table; entry->procname; entry++, node++) for (entry = table; entry->procname; entry++, node++)
...@@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p) ...@@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p)
complete(p->unregistering); complete(p->unregistering);
} }
/* called under sysctl_lock */
static void proc_sys_prune_dcache(struct ctl_table_header *head) static void proc_sys_prune_dcache(struct ctl_table_header *head)
{ {
struct inode *inode, *prev = NULL; struct inode *inode;
struct proc_inode *ei; struct proc_inode *ei;
struct hlist_node *node;
struct super_block *sb;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) { for (;;) {
inode = igrab(&ei->vfs_inode); node = hlist_first_rcu(&head->inodes);
if (inode) { if (!node)
break;
ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
spin_lock(&sysctl_lock);
hlist_del_init_rcu(&ei->sysctl_inodes);
spin_unlock(&sysctl_lock);
inode = &ei->vfs_inode;
sb = inode->i_sb;
if (!atomic_inc_not_zero(&sb->s_active))
continue;
inode = igrab(inode);
rcu_read_unlock(); rcu_read_unlock();
iput(prev); if (unlikely(!inode)) {
prev = inode; deactivate_super(sb);
d_prune_aliases(inode);
rcu_read_lock(); rcu_read_lock();
continue;
} }
d_prune_aliases(inode);
iput(inode);
deactivate_super(sb);
rcu_read_lock();
} }
rcu_read_unlock(); rcu_read_unlock();
iput(prev);
} }
/* called under sysctl_lock, will reacquire if has to wait */ /* called under sysctl_lock, will reacquire if has to wait */
...@@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, ...@@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
} }
ei->sysctl = head; ei->sysctl = head;
ei->sysctl_entry = table; ei->sysctl_entry = table;
list_add_rcu(&ei->sysctl_inodes, &head->inodes); hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
head->count++; head->count++;
spin_unlock(&sysctl_lock); spin_unlock(&sysctl_lock);
...@@ -489,7 +506,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, ...@@ -489,7 +506,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
{ {
spin_lock(&sysctl_lock); spin_lock(&sysctl_lock);
list_del_rcu(&PROC_I(inode)->sysctl_inodes); hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
if (!--head->count) if (!--head->count)
kfree_rcu(head, rcu); kfree_rcu(head, rcu);
spin_unlock(&sysctl_lock); spin_unlock(&sysctl_lock);
......
...@@ -143,7 +143,7 @@ struct ctl_table_header ...@@ -143,7 +143,7 @@ struct ctl_table_header
struct ctl_table_set *set; struct ctl_table_set *set;
struct ctl_dir *parent; struct ctl_dir *parent;
struct ctl_node *node; struct ctl_node *node;
struct list_head inodes; /* head for proc_inode->sysctl_inodes */ struct hlist_head inodes; /* head for proc_inode->sysctl_inodes */
}; };
struct ctl_dir { struct ctl_dir {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment