Commit f075e0f6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "The bulk of changes are cleanups and preparations for the upcoming
  kernfs conversion.

   - cgroup_event mechanism which is and will be used only by memcg is
     moved to memcg.

   - pidlist handling is updated so that it can be served by seq_file.

     Also, the list is not sorted if sane_behavior.  cgroup
     documentation explicitly states that the file is not sorted but it
     has been for quite some time.

   - All cgroup file handling now happens on top of seq_file.  This is
     to prepare for kernfs conversion.  In addition, all operations are
     restructured so that they map 1-1 to kernfs operations.

   - Other cleanups and low-pri fixes"

* 'for-3.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (40 commits)
  cgroup: trivial style updates
  cgroup: remove stray references to css_id
  doc: cgroups: Fix typo in doc/cgroups
  cgroup: fix fail path in cgroup_load_subsys()
  cgroup: fix missing unlock on error in cgroup_load_subsys()
  cgroup: remove for_each_root_subsys()
  cgroup: implement for_each_css()
  cgroup: factor out cgroup_subsys_state creation into create_css()
  cgroup: combine css handling loops in cgroup_create()
  cgroup: reorder operations in cgroup_create()
  cgroup: make for_each_subsys() useable under cgroup_root_mutex
  cgroup: css iterations and css_from_dir() are safe under cgroup_mutex
  cgroup: unify pidlist and other file handling
  cgroup: replace cftype->read_seq_string() with cftype->seq_show()
  cgroup: attach cgroup_open_file to all cgroup files
  cgroup: generalize cgroup_pidlist_open_file
  cgroup: unify read path so that seq_file is always used
  cgroup: unify cgroup_write_X64() and cgroup_write_string()
  cgroup: remove cftype->read(), ->read_map() and ->write()
  hugetlb_cgroup: convert away from cftype->read()
  ...
parents 5cb7398c dd4b0a46
......@@ -24,7 +24,6 @@ CONTENTS:
2.1 Basic Usage
2.2 Attaching processes
2.3 Mounting hierarchies by name
2.4 Notification API
3. Kernel API
3.1 Overview
3.2 Synchronization
......@@ -472,25 +471,6 @@ you give a subsystem a name.
The name of the subsystem appears as part of the hierarchy description
in /proc/mounts and /proc/<pid>/cgroups.
2.4 Notification API
--------------------
There is mechanism which allows to get notifications about changing
status of a cgroup.
To register a new notification handler you need to:
- create a file descriptor for event notification using eventfd(2);
- open a control file to be monitored (e.g. memory.usage_in_bytes);
- write "<event_fd> <control_fd> <args>" to cgroup.event_control.
Interpretation of args is defined by control file implementation;
eventfd will be woken up by control file implementation or when the
cgroup is removed.
To unregister a notification handler just close eventfd.
NOTE: Support of notifications should be implemented for the control
file. See documentation for the subsystem.
3. Kernel API
=============
......
......@@ -577,7 +577,7 @@ Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
per-node page counts including "hierarchical_<counter>" which sums up all
hierarchical children's values in addition to the memcg's own value.
The ouput format of memory.numa_stat is:
The output format of memory.numa_stat is:
total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
......@@ -670,7 +670,7 @@ page tables.
8.1 Interface
This feature is disabled by default. It can be enabledi (and disabled again) by
This feature is disabled by default. It can be enabled (and disabled again) by
writing to memory.move_charge_at_immigrate of the destination cgroup.
If you want to enable it:
......
......@@ -97,8 +97,8 @@ to work with it.
(struct res_counter *rc, struct res_counter *top,
unsinged long val)
Almost same as res_cunter_uncharge() but propagation of uncharge
stops when rc == top. This is useful when kill a res_coutner in
Almost same as res_counter_uncharge() but propagation of uncharge
stops when rc == top. This is useful when kill a res_counter in
child cgroup.
2.1 Other accounting routines
......
......@@ -1303,13 +1303,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
return __blkg_prfill_rwstat(sf, pd, &rwstat);
}
static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int tg_print_cpu_rwstat(struct seq_file *sf, void *v)
{
struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl,
cft->private, true);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat,
&blkcg_policy_throtl, seq_cft(sf)->private, true);
return 0;
}
......@@ -1335,19 +1332,17 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
return __blkg_prfill_u64(sf, pd, v);
}
static int tg_print_conf_u64(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int tg_print_conf_u64(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64,
&blkcg_policy_throtl, cft->private, false);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
&blkcg_policy_throtl, seq_cft(sf)->private, false);
return 0;
}
static int tg_print_conf_uint(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int tg_print_conf_uint(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint,
&blkcg_policy_throtl, cft->private, false);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
&blkcg_policy_throtl, seq_cft(sf)->private, false);
return 0;
}
......@@ -1428,40 +1423,40 @@ static struct cftype throtl_files[] = {
{
.name = "throttle.read_bps_device",
.private = offsetof(struct throtl_grp, bps[READ]),
.read_seq_string = tg_print_conf_u64,
.seq_show = tg_print_conf_u64,
.write_string = tg_set_conf_u64,
.max_write_len = 256,
},
{
.name = "throttle.write_bps_device",
.private = offsetof(struct throtl_grp, bps[WRITE]),
.read_seq_string = tg_print_conf_u64,
.seq_show = tg_print_conf_u64,
.write_string = tg_set_conf_u64,
.max_write_len = 256,
},
{
.name = "throttle.read_iops_device",
.private = offsetof(struct throtl_grp, iops[READ]),
.read_seq_string = tg_print_conf_uint,
.seq_show = tg_print_conf_uint,
.write_string = tg_set_conf_uint,
.max_write_len = 256,
},
{
.name = "throttle.write_iops_device",
.private = offsetof(struct throtl_grp, iops[WRITE]),
.read_seq_string = tg_print_conf_uint,
.seq_show = tg_print_conf_uint,
.write_string = tg_set_conf_uint,
.max_write_len = 256,
},
{
.name = "throttle.io_service_bytes",
.private = offsetof(struct tg_stats_cpu, service_bytes),
.read_seq_string = tg_print_cpu_rwstat,
.seq_show = tg_print_cpu_rwstat,
},
{
.name = "throttle.io_serviced",
.private = offsetof(struct tg_stats_cpu, serviced),
.read_seq_string = tg_print_cpu_rwstat,
.seq_show = tg_print_cpu_rwstat,
},
{ } /* terminate */
};
......
......@@ -1632,11 +1632,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf,
return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
}
static int cfqg_print_weight_device(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int cfqg_print_weight_device(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device,
&blkcg_policy_cfq, 0, false);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
cfqg_prfill_weight_device, &blkcg_policy_cfq,
0, false);
return 0;
}
......@@ -1650,26 +1650,23 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
}
static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css,
struct cftype *cft,
struct seq_file *sf)
static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
{
blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device,
&blkcg_policy_cfq, 0, false);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq,
0, false);
return 0;
}
static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft,
struct seq_file *sf)
static int cfq_print_weight(struct seq_file *sf, void *v)
{
seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight);
seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight);
return 0;
}
static int cfq_print_leaf_weight(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
{
seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight);
seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight);
return 0;
}
......@@ -1762,23 +1759,17 @@ static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
return __cfq_set_weight(css, cft, val, true);
}
static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft,
struct seq_file *sf)
static int cfqg_print_stat(struct seq_file *sf, void *v)
{
struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq,
cft->private, false);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
&blkcg_policy_cfq, seq_cft(sf)->private, false);
return 0;
}
static int cfqg_print_rwstat(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int cfqg_print_rwstat(struct seq_file *sf, void *v)
{
struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq,
cft->private, true);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
&blkcg_policy_cfq, seq_cft(sf)->private, true);
return 0;
}
......@@ -1798,23 +1789,19 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
return __blkg_prfill_rwstat(sf, pd, &sum);
}
static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int cfqg_print_stat_recursive(struct seq_file *sf, void *v)
{
struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive,
&blkcg_policy_cfq, cft->private, false);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
cfqg_prfill_stat_recursive, &blkcg_policy_cfq,
seq_cft(sf)->private, false);
return 0;
}
static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
{
struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive,
&blkcg_policy_cfq, cft->private, true);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq,
seq_cft(sf)->private, true);
return 0;
}
......@@ -1835,13 +1822,11 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
}
/* print avg_queue_size */
static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *sf)
static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v)
{
struct blkcg *blkcg = css_to_blkcg(css);
blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size,
&blkcg_policy_cfq, 0, false);
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
cfqg_prfill_avg_queue_size, &blkcg_policy_cfq,
0, false);
return 0;
}
#endif /* CONFIG_DEBUG_BLK_CGROUP */
......@@ -1851,14 +1836,14 @@ static struct cftype cfq_blkcg_files[] = {
{
.name = "weight_device",
.flags = CFTYPE_ONLY_ON_ROOT,
.read_seq_string = cfqg_print_leaf_weight_device,
.seq_show = cfqg_print_leaf_weight_device,
.write_string = cfqg_set_leaf_weight_device,
.max_write_len = 256,
},
{
.name = "weight",
.flags = CFTYPE_ONLY_ON_ROOT,
.read_seq_string = cfq_print_leaf_weight,
.seq_show = cfq_print_leaf_weight,
.write_u64 = cfq_set_leaf_weight,
},
......@@ -1866,26 +1851,26 @@ static struct cftype cfq_blkcg_files[] = {
{
.name = "weight_device",
.flags = CFTYPE_NOT_ON_ROOT,
.read_seq_string = cfqg_print_weight_device,
.seq_show = cfqg_print_weight_device,
.write_string = cfqg_set_weight_device,
.max_write_len = 256,
},
{
.name = "weight",
.flags = CFTYPE_NOT_ON_ROOT,
.read_seq_string = cfq_print_weight,
.seq_show = cfq_print_weight,
.write_u64 = cfq_set_weight,
},
{
.name = "leaf_weight_device",
.read_seq_string = cfqg_print_leaf_weight_device,
.seq_show = cfqg_print_leaf_weight_device,
.write_string = cfqg_set_leaf_weight_device,
.max_write_len = 256,
},
{
.name = "leaf_weight",
.read_seq_string = cfq_print_leaf_weight,
.seq_show = cfq_print_leaf_weight,
.write_u64 = cfq_set_leaf_weight,
},
......@@ -1893,114 +1878,114 @@ static struct cftype cfq_blkcg_files[] = {
{
.name = "time",
.private = offsetof(struct cfq_group, stats.time),
.read_seq_string = cfqg_print_stat,
.seq_show = cfqg_print_stat,
},
{
.name = "sectors",
.private = offsetof(struct cfq_group, stats.sectors),
.read_seq_string = cfqg_print_stat,
.seq_show = cfqg_print_stat,
},
{
.name = "io_service_bytes",
.private = offsetof(struct cfq_group, stats.service_bytes),
.read_seq_string = cfqg_print_rwstat,
.seq_show = cfqg_print_rwstat,
},
{
.name = "io_serviced",
.private = offsetof(struct cfq_group, stats.serviced),
.read_seq_string = cfqg_print_rwstat,
.seq_show = cfqg_print_rwstat,
},
{
.name = "io_service_time",
.private = offsetof(struct cfq_group, stats.service_time),
.read_seq_string = cfqg_print_rwstat,
.seq_show = cfqg_print_rwstat,
},
{
.name = "io_wait_time",
.private = offsetof(struct cfq_group, stats.wait_time),
.read_seq_string = cfqg_print_rwstat,
.seq_show = cfqg_print_rwstat,
},
{
.name = "io_merged",
.private = offsetof(struct cfq_group, stats.merged),
.read_seq_string = cfqg_print_rwstat,
.seq_show = cfqg_print_rwstat,
},
{
.name = "io_queued",
.private = offsetof(struct cfq_group, stats.queued),
.read_seq_string = cfqg_print_rwstat,
.seq_show = cfqg_print_rwstat,
},
/* the same statictics which cover the cfqg and its descendants */
{
.name = "time_recursive",
.private = offsetof(struct cfq_group, stats.time),
.read_seq_string = cfqg_print_stat_recursive,
.seq_show = cfqg_print_stat_recursive,
},
{
.name = "sectors_recursive",
.private = offsetof(struct cfq_group, stats.sectors),
.read_seq_string = cfqg_print_stat_recursive,
.seq_show = cfqg_print_stat_recursive,
},
{
.name = "io_service_bytes_recursive",
.private = offsetof(struct cfq_group, stats.service_bytes),
.read_seq_string = cfqg_print_rwstat_recursive,
.seq_show = cfqg_print_rwstat_recursive,
},
{
.name = "io_serviced_recursive",
.private = offsetof(struct cfq_group, stats.serviced),
.read_seq_string = cfqg_print_rwstat_recursive,
.seq_show = cfqg_print_rwstat_recursive,
},
{
.name = "io_service_time_recursive",
.private = offsetof(struct cfq_group, stats.service_time),
.read_seq_string = cfqg_print_rwstat_recursive,
.seq_show = cfqg_print_rwstat_recursive,
},
{
.name = "io_wait_time_recursive",
.private = offsetof(struct cfq_group, stats.wait_time),
.read_seq_string = cfqg_print_rwstat_recursive,
.seq_show = cfqg_print_rwstat_recursive,
},
{
.name = "io_merged_recursive",
.private = offsetof(struct cfq_group, stats.merged),
.read_seq_string = cfqg_print_rwstat_recursive,
.seq_show = cfqg_print_rwstat_recursive,
},
{
.name = "io_queued_recursive",
.private = offsetof(struct cfq_group, stats.queued),
.read_seq_string = cfqg_print_rwstat_recursive,
.seq_show = cfqg_print_rwstat_recursive,
},
#ifdef CONFIG_DEBUG_BLK_CGROUP
{
.name = "avg_queue_size",
.read_seq_string = cfqg_print_avg_queue_size,
.seq_show = cfqg_print_avg_queue_size,
},
{
.name = "group_wait_time",
.private = offsetof(struct cfq_group, stats.group_wait_time),
.read_seq_string = cfqg_print_stat,
.seq_show = cfqg_print_stat,
},
{
.name = "idle_time",
.private = offsetof(struct cfq_group, stats.idle_time),
.read_seq_string = cfqg_print_stat,
.seq_show = cfqg_print_stat,
},
{
.name = "empty_time",
.private = offsetof(struct cfq_group, stats.empty_time),
.read_seq_string = cfqg_print_stat,
.seq_show = cfqg_print_stat,
},
{
.name = "dequeue",
.private = offsetof(struct cfq_group, stats.dequeue),
.read_seq_string = cfqg_print_stat,
.seq_show = cfqg_print_stat,
},
{
.name = "unaccounted_time",
.private = offsetof(struct cfq_group, stats.unaccounted_time),
.read_seq_string = cfqg_print_stat,
.seq_show = cfqg_print_stat,
},
#endif /* CONFIG_DEBUG_BLK_CGROUP */
{ } /* terminate */
......
......@@ -163,7 +163,6 @@ static struct cgroup_subsys_state *bcachecg_create(struct cgroup *cgroup)
static void bcachecg_destroy(struct cgroup *cgroup)
{
struct bch_cgroup *cg = cgroup_to_bcache(cgroup);
free_css_id(&bcache_subsys, &cg->css);
kfree(cg);
}
......
......@@ -21,6 +21,7 @@
#include <linux/xattr.h>
#include <linux/fs.h>
#include <linux/percpu-refcount.h>
#include <linux/seq_file.h>
#ifdef CONFIG_CGROUPS
......@@ -28,8 +29,6 @@ struct cgroupfs_root;
struct cgroup_subsys;
struct inode;
struct cgroup;
struct css_id;
struct eventfd_ctx;
extern int cgroup_init_early(void);
extern int cgroup_init(void);
......@@ -79,8 +78,6 @@ struct cgroup_subsys_state {
struct cgroup_subsys_state *parent;
unsigned long flags;
/* ID for this css, if possible */
struct css_id __rcu *id;
/* percpu_ref killing and RCU release */
struct rcu_head rcu_head;
......@@ -239,10 +236,6 @@ struct cgroup {
struct rcu_head rcu_head;
struct work_struct destroy_work;
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
/* directory xattrs */
struct simple_xattrs xattrs;
};
......@@ -280,6 +273,9 @@ enum {
* - "tasks" is removed. Everything should be at process
* granularity. Use "cgroup.procs" instead.
*
* - "cgroup.procs" is not sorted. pids will be unique unless they
* got recycled inbetween reads.
*
* - "release_agent" and "notify_on_release" are removed.
* Replacement notification mechanism will be implemented.
*
......@@ -320,9 +316,6 @@ struct cgroupfs_root {
/* Unique id for this hierarchy. */
int hierarchy_id;
/* A list running through the attached subsystems */
struct list_head subsys_list;
/* The root cgroup for this hierarchy */
struct cgroup top_cgroup;
......@@ -388,16 +381,6 @@ struct css_set {
struct rcu_head rcu_head;
};
/*
* cgroup_map_cb is an abstract callback API for reporting map-valued
* control files
*/
struct cgroup_map_cb {
int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value);
void *state;
};
/*
* struct cftype: handler definitions for cgroup control files
*
......@@ -445,10 +428,6 @@ struct cftype {
*/
struct cgroup_subsys *ss;
int (*open)(struct inode *inode, struct file *file);
ssize_t (*read)(struct cgroup_subsys_state *css, struct cftype *cft,
struct file *file,
char __user *buf, size_t nbytes, loff_t *ppos);
/*
* read_u64() is a shortcut for the common case of returning a
* single integer. Use it in place of read()
......@@ -458,24 +437,14 @@ struct cftype {
* read_s64() is a signed version of read_u64()
*/
s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft);
/*
* read_map() is used for defining a map of key/value
* pairs. It should call cb->fill(cb, key, value) for each
* entry. The key/value pairs (and their ordering) should not
* change between reboots.
*/
int (*read_map)(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb);
/*
* read_seq_string() is used for outputting a simple sequence
* using seqfile.
*/
int (*read_seq_string)(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *m);
ssize_t (*write)(struct cgroup_subsys_state *css, struct cftype *cft,
struct file *file,
const char __user *buf, size_t nbytes, loff_t *ppos);
/* generic seq_file read interface */
int (*seq_show)(struct seq_file *sf, void *v);
/* optional ops, implement all or none */
void *(*seq_start)(struct seq_file *sf, loff_t *ppos);
void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos);
void (*seq_stop)(struct seq_file *sf, void *v);
/*
* write_u64() is a shortcut for the common case of accepting
......@@ -504,27 +473,6 @@ struct cftype {
* kick type for multiplexing.
*/
int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
int (*release)(struct inode *inode, struct file *file);
/*
* register_event() callback will be used to add new userspace
* waiter for changes related to the cftype. Implement it if
* you want to provide this functionality. Use eventfd_signal()
* on eventfd to send notification to userspace.
*/
int (*register_event)(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd,
const char *args);
/*
* unregister_event() callback will be called when userspace
* closes the eventfd or on cgroup removing.
* This callback must be implemented, if you want provide
* notification functionality.
*/
void (*unregister_event)(struct cgroup_subsys_state *css,
struct cftype *cft,
struct eventfd_ctx *eventfd);
};
/*
......@@ -537,6 +485,26 @@ struct cftype_set {
struct cftype *cfts;
};
/*
* cgroupfs file entry, pointed to from leaf dentry->d_fsdata. Don't
* access directly.
*/
struct cfent {
struct list_head node;
struct dentry *dentry;
struct cftype *type;
struct cgroup_subsys_state *css;
/* file xattrs */
struct simple_xattrs xattrs;
};
/* seq_file->private points to the following, only ->priv is public */
struct cgroup_open_file {
struct cfent *cfe;
void *priv;
};
/*
* See the comment above CGRP_ROOT_SANE_BEHAVIOR for details. This
* function can be called as long as @cgrp is accessible.
......@@ -552,6 +520,18 @@ static inline const char *cgroup_name(const struct cgroup *cgrp)
return rcu_dereference(cgrp->name)->name;
}
static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
{
struct cgroup_open_file *of = seq->private;
return of->cfe->css;
}
static inline struct cftype *seq_cft(struct seq_file *seq)
{
struct cgroup_open_file *of = seq->private;
return of->cfe->type;
}
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
......@@ -631,12 +611,8 @@ struct cgroup_subsys {
#define MAX_CGROUP_TYPE_NAMELEN 32
const char *name;
/*
* Link to parent, and list entry in parent's children.
* Protected by cgroup_lock()
*/
/* link to parent, protected by cgroup_lock() */
struct cgroupfs_root *root;
struct list_head sibling;
/* list of cftype_sets */
struct list_head cftsets;
......
......@@ -7,6 +7,7 @@
#include <linux/gfp.h>
#include <linux/types.h>
#include <linux/cgroup.h>
#include <linux/eventfd.h>
struct vmpressure {
unsigned long scanned;
......@@ -33,13 +34,10 @@ extern void vmpressure_init(struct vmpressure *vmpr);
extern void vmpressure_cleanup(struct vmpressure *vmpr);
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
extern int vmpressure_register_event(struct cgroup_subsys_state *css,
struct cftype *cft,
extern int vmpressure_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd,
const char *args);
extern void vmpressure_unregister_event(struct cgroup_subsys_state *css,
struct cftype *cft,
extern void vmpressure_unregister_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd);
#else
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
......
......@@ -854,7 +854,6 @@ config NUMA_BALANCING
menuconfig CGROUPS
boolean "Control Group support"
depends on EVENTFD
help
This option adds support for grouping sets of processes together, for
use with process control subsystems such as Cpusets, CFS, memory
......@@ -921,6 +920,7 @@ config MEMCG
bool "Memory Resource Controller for Control Groups"
depends on RESOURCE_COUNTERS
select MM_OWNER
select EVENTFD
help
Provides a memory resource controller that manages both anonymous
memory and page cache. (See Documentation/cgroups/memory.txt)
......@@ -1160,7 +1160,6 @@ config UIDGID_STRICT_TYPE_CHECKS
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select EVENTFD
select CGROUPS
select CGROUP_SCHED
select FAIR_GROUP_SCHED
......
This diff is collapsed.
......@@ -301,10 +301,9 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
spin_unlock_irq(&freezer->lock);
}
static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
struct seq_file *m)
static int freezer_read(struct seq_file *m, void *v)
{
struct cgroup_subsys_state *pos;
struct cgroup_subsys_state *css = seq_css(m), *pos;
rcu_read_lock();
......@@ -458,7 +457,7 @@ static struct cftype files[] = {
{
.name = "state",
.flags = CFTYPE_NOT_ON_ROOT,
.read_seq_string = freezer_read,
.seq_show = freezer_read,
.write_string = freezer_write,
},
{
......
......@@ -1731,66 +1731,41 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css,
* used, list of ranges of sequential numbers, is variable length,
* and since these maps can change value dynamically, one could read
* gibberish by doing partial reads while a list was changing.
* A single large read to a buffer that crosses a page boundary is
* ok, because the result being copied to user land is not recomputed
* across a page fault.
*/
static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
static int cpuset_common_seq_show(struct seq_file *sf, void *v)
{
size_t count;
mutex_lock(&callback_mutex);
count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
mutex_unlock(&callback_mutex);
struct cpuset *cs = css_cs(seq_css(sf));
cpuset_filetype_t type = seq_cft(sf)->private;
ssize_t count;
char *buf, *s;
int ret = 0;
return count;
}
static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
{
size_t count;
count = seq_get_buf(sf, &buf);
s = buf;
mutex_lock(&callback_mutex);
count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
mutex_unlock(&callback_mutex);
return count;
}
static ssize_t cpuset_common_file_read(struct cgroup_subsys_state *css,
struct cftype *cft, struct file *file,
char __user *buf, size_t nbytes,
loff_t *ppos)
{
struct cpuset *cs = css_cs(css);
cpuset_filetype_t type = cft->private;
char *page;
ssize_t retval = 0;
char *s;
if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
return -ENOMEM;
s = page;
switch (type) {
case FILE_CPULIST:
s += cpuset_sprintf_cpulist(s, cs);
s += cpulist_scnprintf(s, count, cs->cpus_allowed);
break;
case FILE_MEMLIST:
s += cpuset_sprintf_memlist(s, cs);
s += nodelist_scnprintf(s, count, cs->mems_allowed);
break;
default:
retval = -EINVAL;
goto out;
ret = -EINVAL;
goto out_unlock;
}
*s++ = '\n';
retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
out:
free_page((unsigned long)page);
return retval;
if (s < buf + count - 1) {
*s++ = '\n';
seq_commit(sf, s - buf);
} else {
seq_commit(sf, -1);
}
out_unlock:
mutex_unlock(&callback_mutex);
return ret;
}
static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
......@@ -1847,7 +1822,7 @@ static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
static struct cftype files[] = {
{
.name = "cpus",
.read = cpuset_common_file_read,
.seq_show = cpuset_common_seq_show,
.write_string = cpuset_write_resmask,
.max_write_len = (100U + 6 * NR_CPUS),
.private = FILE_CPULIST,
......@@ -1855,7 +1830,7 @@ static struct cftype files[] = {
{
.name = "mems",
.read = cpuset_common_file_read,
.seq_show = cpuset_common_seq_show,
.write_string = cpuset_write_resmask,
.max_write_len = (100U + 6 * MAX_NUMNODES),
.private = FILE_MEMLIST,
......
......@@ -7852,15 +7852,14 @@ static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
return ret;
}
static int cpu_stats_show(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb)
static int cpu_stats_show(struct seq_file *sf, void *v)
{
struct task_group *tg = css_tg(css);
struct task_group *tg = css_tg(seq_css(sf));
struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
cb->fill(cb, "nr_periods", cfs_b->nr_periods);
cb->fill(cb, "nr_throttled", cfs_b->nr_throttled);
cb->fill(cb, "throttled_time", cfs_b->throttled_time);
seq_printf(sf, "nr_periods %d\n", cfs_b->nr_periods);
seq_printf(sf, "nr_throttled %d\n", cfs_b->nr_throttled);
seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
return 0;
}
......@@ -7914,7 +7913,7 @@ static struct cftype cpu_files[] = {
},
{
.name = "stat",
.read_map = cpu_stats_show,
.seq_show = cpu_stats_show,
},
#endif
#ifdef CONFIG_RT_GROUP_SCHED
......
......@@ -163,10 +163,9 @@ static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
return err;
}
static int cpuacct_percpu_seq_read(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *m)
static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
{
struct cpuacct *ca = css_ca(css);
struct cpuacct *ca = css_ca(seq_css(m));
u64 percpu;
int i;
......@@ -183,10 +182,9 @@ static const char * const cpuacct_stat_desc[] = {
[CPUACCT_STAT_SYSTEM] = "system",
};
static int cpuacct_stats_show(struct cgroup_subsys_state *css,
struct cftype *cft, struct cgroup_map_cb *cb)
static int cpuacct_stats_show(struct seq_file *sf, void *v)
{
struct cpuacct *ca = css_ca(css);
struct cpuacct *ca = css_ca(seq_css(sf));
int cpu;
s64 val = 0;
......@@ -196,7 +194,7 @@ static int cpuacct_stats_show(struct cgroup_subsys_state *css,
val += kcpustat->cpustat[CPUTIME_NICE];
}
val = cputime64_to_clock_t(val);
cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_USER], val);
val = 0;
for_each_online_cpu(cpu) {
......@@ -207,7 +205,7 @@ static int cpuacct_stats_show(struct cgroup_subsys_state *css,
}
val = cputime64_to_clock_t(val);
cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
seq_printf(sf, "%s %lld\n", cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
return 0;
}
......@@ -220,11 +218,11 @@ static struct cftype files[] = {
},
{
.name = "usage_percpu",
.read_seq_string = cpuacct_percpu_seq_read,
.seq_show = cpuacct_percpu_seq_show,
},
{
.name = "stat",
.read_map = cpuacct_stats_show,
.seq_show = cpuacct_stats_show,
},
{ } /* terminate */
};
......
......@@ -242,22 +242,16 @@ void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
return;
}
static ssize_t hugetlb_cgroup_read(struct cgroup_subsys_state *css,
struct cftype *cft, struct file *file,
char __user *buf, size_t nbytes,
loff_t *ppos)
static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft)
{
u64 val;
char str[64];
int idx, name, len;
int idx, name;
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
idx = MEMFILE_IDX(cft->private);
name = MEMFILE_ATTR(cft->private);
val = res_counter_read_u64(&h_cg->hugepage[idx], name);
len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val);
return simple_read_from_buffer(buf, nbytes, ppos, str, len);
return res_counter_read_u64(&h_cg->hugepage[idx], name);
}
static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
......@@ -337,28 +331,28 @@ static void __init __hugetlb_cgroup_file_init(int idx)
cft = &h->cgroup_files[0];
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
cft->read = hugetlb_cgroup_read;
cft->read_u64 = hugetlb_cgroup_read_u64;
cft->write_string = hugetlb_cgroup_write;
/* Add the usage file */
cft = &h->cgroup_files[1];
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
cft->read = hugetlb_cgroup_read;
cft->read_u64 = hugetlb_cgroup_read_u64;
/* Add the MAX usage file */
cft = &h->cgroup_files[2];
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
cft->trigger = hugetlb_cgroup_reset;
cft->read = hugetlb_cgroup_read;
cft->read_u64 = hugetlb_cgroup_read_u64;
/* Add the failcntfile */
cft = &h->cgroup_files[3];
snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
cft->trigger = hugetlb_cgroup_reset;
cft->read = hugetlb_cgroup_read;
cft->read_u64 = hugetlb_cgroup_read_u64;
/* NULL terminate the last cft */
cft = &h->cgroup_files[4];
......
This diff is collapsed.
......@@ -451,7 +451,7 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
* lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
* @ent: swap entry to be looked up.
*
* Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
* Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
*/
unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
{
......
......@@ -278,8 +278,7 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
/**
* vmpressure_register_event() - Bind vmpressure notifications to an eventfd
* @css: css that is interested in vmpressure notifications
* @cft: cgroup control files handle
* @memcg: memcg that is interested in vmpressure notifications
* @eventfd: eventfd context to link notifications with
* @args: event arguments (used to set up a pressure level threshold)
*
......@@ -289,15 +288,12 @@ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
* threshold (one of vmpressure_str_levels, i.e. "low", "medium", or
* "critical").
*
* This function should not be used directly, just pass it to (struct
* cftype).register_event, and then cgroup core will handle everything by
* itself.
* To be used as memcg event method.
*/
int vmpressure_register_event(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd,
const char *args)
int vmpressure_register_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd, const char *args)
{
struct vmpressure *vmpr = css_to_vmpressure(css);
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
struct vmpressure_event *ev;
int level;
......@@ -325,23 +321,19 @@ int vmpressure_register_event(struct cgroup_subsys_state *css,
/**
* vmpressure_unregister_event() - Unbind eventfd from vmpressure
* @css: css handle
* @cft: cgroup control files handle
* @memcg: memcg handle
* @eventfd: eventfd context that was used to link vmpressure with the @cg
*
* This function does internal manipulations to detach the @eventfd from
* the vmpressure notifications, and then frees internal resources
* associated with the @eventfd (but the @eventfd itself is not freed).
*
* This function should not be used directly, just pass it to (struct
* cftype).unregister_event, and then cgroup core will handle everything
* by itself.
* To be used as memcg event method.
*/
void vmpressure_unregister_event(struct cgroup_subsys_state *css,
struct cftype *cft,
void vmpressure_unregister_event(struct mem_cgroup *memcg,
struct eventfd_ctx *eventfd)
{
struct vmpressure *vmpr = css_to_vmpressure(css);
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
struct vmpressure_event *ev;
mutex_lock(&vmpr->events_lock);
......
......@@ -173,14 +173,14 @@ static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft)
return css->cgroup->id;
}
static int read_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
struct cgroup_map_cb *cb)
static int read_priomap(struct seq_file *sf, void *v)
{
struct net_device *dev;
rcu_read_lock();
for_each_netdev_rcu(&init_net, dev)
cb->fill(cb, dev->name, netprio_prio(css, dev));
seq_printf(sf, "%s %u\n", dev->name,
netprio_prio(seq_css(sf), dev));
rcu_read_unlock();
return 0;
}
......@@ -238,7 +238,7 @@ static struct cftype ss_files[] = {
},
{
.name = "ifpriomap",
.read_map = read_priomap,
.seq_show = read_priomap,
.write_string = write_priomap,
},
{ } /* terminate */
......
......@@ -274,10 +274,9 @@ static void set_majmin(char *str, unsigned m)
sprintf(str, "%u", m);
}
static int devcgroup_seq_read(struct cgroup_subsys_state *css,
struct cftype *cft, struct seq_file *m)
static int devcgroup_seq_show(struct seq_file *m, void *v)
{
struct dev_cgroup *devcgroup = css_to_devcgroup(css);
struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m));
struct dev_exception_item *ex;
char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
......@@ -679,7 +678,7 @@ static struct cftype dev_cgroup_files[] = {
},
{
.name = "list",
.read_seq_string = devcgroup_seq_read,
.seq_show = devcgroup_seq_show,
.private = DEVCG_LIST,
},
{ } /* terminate */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment