Commit 4ab78683 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

cgroups: fix race between userspace and kernelspace

Notify userspace about cgroup removing only after rmdir of cgroup
directory to avoid race between userspace and kernelspace.

eventfd are used to notify about two types of event:
 - control file-specific, like crossing memory threshold;
 - cgroup removing.

To understand what really happen, userspace can check if the cgroup still
exists.  To avoid race beetween userspace and kernelspace we have to
notify userspace about cgroup removing only after rmdir of cgroup
directory.
Signed-off-by: default avatarKirill A. Shutemov <kirill@shutemov.name>
Reviewed-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Acked-by: default avatarLi Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Cc: Dan Malek <dan@embeddedalley.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent daaf1e68
...@@ -795,28 +795,15 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) ...@@ -795,28 +795,15 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
static int cgroup_call_pre_destroy(struct cgroup *cgrp) static int cgroup_call_pre_destroy(struct cgroup *cgrp)
{ {
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
struct cgroup_event *event, *tmp;
int ret = 0; int ret = 0;
for_each_subsys(cgrp->root, ss) for_each_subsys(cgrp->root, ss)
if (ss->pre_destroy) { if (ss->pre_destroy) {
ret = ss->pre_destroy(ss, cgrp); ret = ss->pre_destroy(ss, cgrp);
if (ret) if (ret)
goto out; break;
} }
/*
* Unregister events and notify userspace.
*/
spin_lock(&cgrp->event_list_lock);
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
list_del(&event->list);
eventfd_signal(event->eventfd, 1);
schedule_work(&event->remove);
}
spin_unlock(&cgrp->event_list_lock);
out:
return ret; return ret;
} }
...@@ -3006,7 +2993,6 @@ static void cgroup_event_remove(struct work_struct *work) ...@@ -3006,7 +2993,6 @@ static void cgroup_event_remove(struct work_struct *work)
event->cft->unregister_event(cgrp, event->cft, event->eventfd); event->cft->unregister_event(cgrp, event->cft, event->eventfd);
eventfd_ctx_put(event->eventfd); eventfd_ctx_put(event->eventfd);
remove_wait_queue(event->wqh, &event->wait);
kfree(event); kfree(event);
} }
...@@ -3024,6 +3010,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, ...@@ -3024,6 +3010,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
unsigned long flags = (unsigned long)key; unsigned long flags = (unsigned long)key;
if (flags & POLLHUP) { if (flags & POLLHUP) {
remove_wait_queue_locked(event->wqh, &event->wait);
spin_lock(&cgrp->event_list_lock); spin_lock(&cgrp->event_list_lock);
list_del(&event->list); list_del(&event->list);
spin_unlock(&cgrp->event_list_lock); spin_unlock(&cgrp->event_list_lock);
...@@ -3472,6 +3459,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) ...@@ -3472,6 +3459,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
struct dentry *d; struct dentry *d;
struct cgroup *parent; struct cgroup *parent;
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
struct cgroup_event *event, *tmp;
int ret; int ret;
/* the vfs holds both inode->i_mutex already */ /* the vfs holds both inode->i_mutex already */
...@@ -3555,6 +3543,20 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) ...@@ -3555,6 +3543,20 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
set_bit(CGRP_RELEASABLE, &parent->flags); set_bit(CGRP_RELEASABLE, &parent->flags);
check_for_release(parent); check_for_release(parent);
/*
* Unregister events and notify userspace.
* Notify userspace about cgroup removing only after rmdir of cgroup
* directory to avoid race between userspace and kernelspace
*/
spin_lock(&cgrp->event_list_lock);
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
list_del(&event->list);
remove_wait_queue(event->wqh, &event->wait);
eventfd_signal(event->eventfd, 1);
schedule_work(&event->remove);
}
spin_unlock(&cgrp->event_list_lock);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment