Commit edab9510 authored by Tejun Heo's avatar Tejun Heo

cgroup: Merge branch 'memcg_event' into for-3.14

Merge v3.12 based patch series to move cgroup_event implementation to
memcg into for-3.14.  The following two commits cause a conflict in
kernel/cgroup.c

  2ff2a7d0 ("cgroup: kill css_id")
  79bd9814 ("cgroup, memcg: move cgroup_event implementation to memcg")

Each patch removes a struct definition from kernel/cgroup.c.  As the
two are adjacent, they cause a context conflict.  Easily resolved by
removing both structs.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parents e5fca243 b36824c7
......@@ -24,7 +24,6 @@ CONTENTS:
2.1 Basic Usage
2.2 Attaching processes
2.3 Mounting hierarchies by name
2.4 Notification API
3. Kernel API
3.1 Overview
3.2 Synchronization
......@@ -472,25 +471,6 @@ you give a subsystem a name.
The name of the subsystem appears as part of the hierarchy description
in /proc/mounts and /proc/<pid>/cgroups.
2.4 Notification API
--------------------
There is mechanism which allows to get notifications about changing
status of a cgroup.
To register a new notification handler you need to:
- create a file descriptor for event notification using eventfd(2);
- open a control file to be monitored (e.g. memory.usage_in_bytes);
- write "<event_fd> <control_fd> <args>" to cgroup.event_control.
Interpretation of args is defined by control file implementation;
eventfd will be woken up by control file implementation or when the
cgroup is removed.
To unregister a notification handler just close eventfd.
NOTE: Support of notifications should be implemented for the control
file. See documentation for the subsystem.
3. Kernel API
=============
......
......@@ -29,7 +29,6 @@ struct cgroup_subsys;
struct inode;
struct cgroup;
struct css_id;
struct eventfd_ctx;
extern int cgroup_init_early(void);
extern int cgroup_init(void);
......@@ -239,10 +238,6 @@ struct cgroup {
struct rcu_head rcu_head;
struct work_struct destroy_work;
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
/* directory xattrs */
struct simple_xattrs xattrs;
};
......@@ -506,25 +501,6 @@ struct cftype {
int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
int (*release)(struct inode *inode, struct file *file);
/*
* register_event() callback will be used to add new userspace
* waiter for changes related to the cftype. Implement it if
* you want to provide this functionality. Use eventfd_signal()
* on eventfd to send notification to userspace.
*/
int (*register_event)(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd,
const char *args);
/*
* unregister_event() callback will be called when userspace
* closes the eventfd or on cgroup removing.
* This callback must be implemented, if you want provide
* notification functionality.
*/
void (*unregister_event)(struct cgroup_subsys_state *css,
struct cftype *cft,
struct eventfd_ctx *eventfd);
};
/*
......
......@@ -7,6 +7,7 @@
#include <linux/gfp.h>
#include <linux/types.h>
#include <linux/cgroup.h>
#include <linux/eventfd.h>
struct vmpressure {
unsigned long scanned;
......@@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr);
extern void vmpressure_cleanup(struct vmpressure *vmpr);
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
extern int vmpressure_register_event(struct cgroup_subsys_state *css,
struct cftype *cft,
extern int vmpressure_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd,
const char *args);
extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
struct cftype *cft,
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd);
#else
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
......
......@@ -848,7 +848,6 @@ config NUMA_BALANCING
menuconfig CGROUPS
boolean "Control Group support"
depends on EVENTFD
help
This option adds support for grouping sets of processes together, for
use with process control subsystems such as Cpusets, CFS, memory
......@@ -915,6 +914,7 @@ config MEMCG
bool "Memory Resource Controller for Control Groups"
depends on RESOURCE_COUNTERS
select MM_OWNER
select EVENTFD
help
Provides a memory resource controller that manages both anonymous
memory and page cache. (See Documentation/cgroups/memory.txt)
......@@ -1154,7 +1154,6 @@ config UIDGID_STRICT_TYPE_CHECKS
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select EVENTFD
select CGROUPS
select CGROUP_SCHED
select FAIR_GROUP_SCHED
......
......@@ -56,11 +56,8 @@
#include <linux/pid_namespace.h>
#include <linux/idr.h>
#include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */
#include <linux/eventfd.h>
#include <linux/poll.h>
#include <linux/flex_array.h> /* used in cgroup_attach_task */
#include <linux/kthread.h>
#include <linux/file.h>
#include <linux/atomic.h>
......@@ -132,36 +129,6 @@ struct cfent {
struct simple_xattrs xattrs;
};
/*
* cgroup_event represents events which userspace want to receive.
*/
struct cgroup_event {
/*
* css which the event belongs to.
*/
struct cgroup_subsys_state *css;
/*
* Control file which the event associated.
*/
struct cftype *cft;
/*
* eventfd to signal userspace about the event.
*/
struct eventfd_ctx *eventfd;
/*
* Each of these stored in a list by the cgroup.
*/
struct list_head list;
/*
* All fields below needed to unregister event when
* userspace closes eventfd.
*/
poll_table pt;
wait_queue_head_t *wqh;
wait_queue_t wait;
struct work_struct remove;
};
/* The list of hierarchy roots */
static LIST_HEAD(cgroup_roots);
......@@ -1351,8 +1318,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex);
cgrp->dummy_css.cgroup = cgrp;
INIT_LIST_HEAD(&cgrp->event_list);
spin_lock_init(&cgrp->event_list_lock);
simple_xattrs_init(&cgrp->xattrs);
}
......@@ -2626,16 +2591,6 @@ static const struct inode_operations cgroup_dir_inode_operations = {
.removexattr = cgroup_removexattr,
};
/*
* Check if a file is a control file
*/
static inline struct cftype *__file_cft(struct file *file)
{
if (file_inode(file)->i_fop != &cgroup_file_operations)
return ERR_PTR(-EINVAL);
return __d_cft(file->f_dentry);
}
static int cgroup_create_file(struct dentry *dentry, umode_t mode,
struct super_block *sb)
{
......@@ -3915,202 +3870,6 @@ static void cgroup_dput(struct cgroup *cgrp)
deactivate_super(sb);
}
/*
* Unregister event and free resources.
*
* Gets called from workqueue.
*/
static void cgroup_event_remove(struct work_struct *work)
{
struct cgroup_event *event = container_of(work, struct cgroup_event,
remove);
struct cgroup_subsys_state *css = event->css;
remove_wait_queue(event->wqh, &event->wait);
event->cft->unregister_event(css, event->cft, event->eventfd);
/* Notify userspace the event is going away. */
eventfd_signal(event->eventfd, 1);
eventfd_ctx_put(event->eventfd);
kfree(event);
css_put(css);
}
/*
* Gets called on POLLHUP on eventfd when user closes it.
*
* Called with wqh->lock held and interrupts disabled.
*/
static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
int sync, void *key)
{
struct cgroup_event *event = container_of(wait,
struct cgroup_event, wait);
struct cgroup *cgrp = event->css->cgroup;
unsigned long flags = (unsigned long)key;
if (flags & POLLHUP) {
/*
* If the event has been detached at cgroup removal, we
* can simply return knowing the other side will cleanup
* for us.
*
* We can't race against event freeing since the other
* side will require wqh->lock via remove_wait_queue(),
* which we hold.
*/
spin_lock(&cgrp->event_list_lock);
if (!list_empty(&event->list)) {
list_del_init(&event->list);
/*
* We are in atomic context, but cgroup_event_remove()
* may sleep, so we have to call it in workqueue.
*/
schedule_work(&event->remove);
}
spin_unlock(&cgrp->event_list_lock);
}
return 0;
}
static void cgroup_event_ptable_queue_proc(struct file *file,
wait_queue_head_t *wqh, poll_table *pt)
{
struct cgroup_event *event = container_of(pt,
struct cgroup_event, pt);
event->wqh = wqh;
add_wait_queue(wqh, &event->wait);
}
/*
* Parse input and register new cgroup event handler.
*
* Input must be in format '<event_fd> <control_fd> <args>'.
* Interpretation of args is defined by control file implementation.
*/
static int cgroup_write_event_control(struct cgroup_subsys_state *dummy_css,
struct cftype *cft, const char *buffer)
{
struct cgroup *cgrp = dummy_css->cgroup;
struct cgroup_event *event;
struct cgroup_subsys_state *cfile_css;
unsigned int efd, cfd;
struct fd efile;
struct fd cfile;
char *endp;
int ret;
efd = simple_strtoul(buffer, &endp, 10);
if (*endp != ' ')
return -EINVAL;
buffer = endp + 1;
cfd = simple_strtoul(buffer, &endp, 10);
if ((*endp != ' ') && (*endp != '\0'))
return -EINVAL;
buffer = endp + 1;
event = kzalloc(sizeof(*event), GFP_KERNEL);
if (!event)
return -ENOMEM;
INIT_LIST_HEAD(&event->list);
init_poll_funcptr(&event->pt, cgroup_event_ptable_queue_proc);
init_waitqueue_func_entry(&event->wait, cgroup_event_wake);
INIT_WORK(&event->remove, cgroup_event_remove);
efile = fdget(efd);
if (!efile.file) {
ret = -EBADF;
goto out_kfree;
}
event->eventfd = eventfd_ctx_fileget(efile.file);
if (IS_ERR(event->eventfd)) {
ret = PTR_ERR(event->eventfd);
goto out_put_efile;
}
cfile = fdget(cfd);
if (!cfile.file) {
ret = -EBADF;
goto out_put_eventfd;
}
/* the process need read permission on control file */
/* AV: shouldn't we check that it's been opened for read instead? */
ret = inode_permission(file_inode(cfile.file), MAY_READ);
if (ret < 0)
goto out_put_cfile;
event->cft = __file_cft(cfile.file);
if (IS_ERR(event->cft)) {
ret = PTR_ERR(event->cft);
goto out_put_cfile;
}
if (!event->cft->ss) {
ret = -EBADF;
goto out_put_cfile;
}
/*
* Determine the css of @cfile, verify it belongs to the same
* cgroup as cgroup.event_control, and associate @event with it.
* Remaining events are automatically removed on cgroup destruction
* but the removal is asynchronous, so take an extra ref.
*/
rcu_read_lock();
ret = -EINVAL;
event->css = cgroup_css(cgrp, event->cft->ss);
cfile_css = css_from_dir(cfile.file->f_dentry->d_parent, event->cft->ss);
if (event->css && event->css == cfile_css && css_tryget(event->css))
ret = 0;
rcu_read_unlock();
if (ret)
goto out_put_cfile;
if (!event->cft->register_event || !event->cft->unregister_event) {
ret = -EINVAL;
goto out_put_css;
}
ret = event->cft->register_event(event->css, event->cft,
event->eventfd, buffer);
if (ret)
goto out_put_css;
efile.file->f_op->poll(efile.file, &event->pt);
spin_lock(&cgrp->event_list_lock);
list_add(&event->list, &cgrp->event_list);
spin_unlock(&cgrp->event_list_lock);
fdput(cfile);
fdput(efile);
return 0;
out_put_css:
css_put(event->css);
out_put_cfile:
fdput(cfile);
out_put_eventfd:
eventfd_ctx_put(event->eventfd);
out_put_efile:
fdput(efile);
out_kfree:
kfree(event);
return ret;
}
static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
......@@ -4135,11 +3894,6 @@ static struct cftype cgroup_base_files[] = {
.release = cgroup_pidlist_release,
.mode = S_IRUGO | S_IWUSR,
},
{
.name = "cgroup.event_control",
.write_string = cgroup_write_event_control,
.mode = S_IWUGO,
},
{
.name = "cgroup.clone_children",
.flags = CFTYPE_INSANE,
......@@ -4610,7 +4364,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
__releases(&cgroup_mutex) __acquires(&cgroup_mutex)
{
struct dentry *d = cgrp->dentry;
struct cgroup_event *event, *tmp;
struct cgroup_subsys *ss;
struct cgroup *child;
bool empty;
......@@ -4685,18 +4438,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
dget(d);
cgroup_d_remove_dir(d);
/*
* Unregister events and notify userspace.
* Notify userspace about cgroup removing only after rmdir of cgroup
* directory to avoid race between userspace and kernelspace.
*/
spin_lock(&cgrp->event_list_lock);
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
list_del_init(&event->list);
schedule_work(&event->remove);
}
spin_unlock(&cgrp->event_list_lock);
return 0;
};
......
This diff is collapsed.
......@@ -278,8 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
/**
* vmpressure_register_event() - Bind vmpressure notifications to an eventfd
* @css: css that is interested in vmpressure notifications
* @cft: cgroup control files handle
* @memcg: memcg that is interested in vmpressure notifications
* @eventfd: eventfd context to link notifications with
* @args: event arguments (used to set up a pressure level threshold)
*
......@@ -289,15 +288,12 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
* threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
* "critical").
*
* This function should not be used directly, just pass it to (struct
* cftype).register_event, and then cgroup core will handle everything by
* itself.
* To be used as memcg event method.
*/
int vmpressure_register_event(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd,
const char *args)
int vmpressure_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args)
{
struct vmpressure *vmpr = css_to_vmpressure(css);
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
struct vmpressure_event *ev;
int level;
......@@ -325,23 +321,19 @@ int vmpressure_register_event(struct cgroup_subsys_state *css,
/**
* vmpressure_unregister_event() - Unbind eventfd from vmpressure
* @css: css handle
* @cft: cgroup control files handle
* @memcg: memcg handle
* @eventfd: eventfd context that was used to link vmpressure with the @cg
*
* This function does internal manipulations to detach the @eventfd from
* the vmpressure notifications, and then frees internal resources
* associated with the @eventfd (but the @eventfd itself is not freed).
*
* This function should not be used directly, just pass it to (struct
* cftype).unregister_event, and then cgroup core will handle everything
* by itself.
* To be used as memcg event method.
*/
void vmpressure_unregister_event(struct cgroup_subsys_state *css,
struct cftype *cft,
void vmpressure_unregister_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd)
{
struct vmpressure *vmpr = css_to_vmpressure(css);
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
struct vmpressure_event *ev;
mutex_lock(&vmpr->events_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment