Commit 608c1d3c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "Several notable changes this cycle:

   - Thread mode was merged. This will be used for cgroup2 support for
     CPU and possibly other controllers. Unfortunately, CPU controller
     cgroup2 support didn't make this pull request but most contentions
     have been resolved and the support is likely to be merged before
     the next merge window.

   - cgroup.stat now shows the number of descendant cgroups.

   - cpuset now can enable the easier-to-configure v2 behavior on v1
     hierarchy"

* 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (21 commits)
  cpuset: Allow v2 behavior in v1 cgroup
  cgroup: Add mount flag to enable cpuset to use v2 behavior in v1 cgroup
  cgroup: remove unneeded checks
  cgroup: misc changes
  cgroup: short-circuit cset_cgroup_from_root() on the default hierarchy
  cgroup: re-use the parent pointer in cgroup_destroy_locked()
  cgroup: add cgroup.stat interface with basic hierarchy stats
  cgroup: implement hierarchy limits
  cgroup: keep track of number of descent cgroups
  cgroup: add comment to cgroup_enable_threaded()
  cgroup: remove unnecessary empty check when enabling threaded mode
  cgroup: update debug controller to print out thread mode information
  cgroup: implement cgroup v2 thread support
  cgroup: implement CSS_TASK_ITER_THREADED
  cgroup: introduce cgroup->dom_cgrp and threaded css_set handling
  cgroup: add @flags to css_task_iter_start() and implement CSS_TASK_ITER_PROCS
  cgroup: reorganize cgroup.procs / task write path
  cgroup: replace css_set walking populated test with testing cgrp->nr_populated_csets
  cgroup: distinguish local and children populated states
  cgroup: remove now unused list_head @pending in cgroup_apply_cftypes()
  ...
parents 9954d489 b8d1b8ee
This diff is collapsed.
...@@ -74,6 +74,11 @@ enum { ...@@ -74,6 +74,11 @@ enum {
* aren't writeable from inside the namespace. * aren't writeable from inside the namespace.
*/ */
CGRP_ROOT_NS_DELEGATE = (1 << 3), CGRP_ROOT_NS_DELEGATE = (1 << 3),
/*
* Enable cpuset controller in v1 cgroup to use v2 behavior.
*/
CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
}; };
/* cftype->flags */ /* cftype->flags */
...@@ -172,6 +177,14 @@ struct css_set { ...@@ -172,6 +177,14 @@ struct css_set {
/* reference count */ /* reference count */
refcount_t refcount; refcount_t refcount;
/*
* For a domain cgroup, the following points to self. If threaded,
* to the matching cset of the nearest domain ancestor. The
* dom_cset provides access to the domain cgroup and its csses to
* which domain level resource consumptions should be charged.
*/
struct css_set *dom_cset;
/* the default cgroup associated with this css_set */ /* the default cgroup associated with this css_set */
struct cgroup *dfl_cgrp; struct cgroup *dfl_cgrp;
...@@ -200,6 +213,10 @@ struct css_set { ...@@ -200,6 +213,10 @@ struct css_set {
*/ */
struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
/* all threaded csets whose ->dom_cset points to this cset */
struct list_head threaded_csets;
struct list_head threaded_csets_node;
/* /*
* List running through all cgroup groups in the same hash * List running through all cgroup groups in the same hash
* slot. Protected by css_set_lock * slot. Protected by css_set_lock
...@@ -261,13 +278,35 @@ struct cgroup { ...@@ -261,13 +278,35 @@ struct cgroup {
*/ */
int level; int level;
/* Maximum allowed descent tree depth */
int max_depth;
/*
* Keep track of total numbers of visible and dying descent cgroups.
* Dying cgroups are cgroups which were deleted by a user,
* but are still existing because someone else is holding a reference.
* max_descendants is a maximum allowed number of descent cgroups.
*/
int nr_descendants;
int nr_dying_descendants;
int max_descendants;
/* /*
* Each non-empty css_set associated with this cgroup contributes * Each non-empty css_set associated with this cgroup contributes
* one to populated_cnt. All children with non-zero popuplated_cnt * one to nr_populated_csets. The counter is zero iff this cgroup
* of their own contribute one. The count is zero iff there's no * doesn't have any tasks.
* task in this cgroup or its subtree. *
* All children which have non-zero nr_populated_csets and/or
* nr_populated_children of their own contribute one to either
* nr_populated_domain_children or nr_populated_threaded_children
* depending on their type. Each counter is zero iff all cgroups
* of the type in the subtree proper don't have any tasks.
*/ */
int populated_cnt; int nr_populated_csets;
int nr_populated_domain_children;
int nr_populated_threaded_children;
int nr_threaded_children; /* # of live threaded child cgroups */
struct kernfs_node *kn; /* cgroup kernfs entry */ struct kernfs_node *kn; /* cgroup kernfs entry */
struct cgroup_file procs_file; /* handle for "cgroup.procs" */ struct cgroup_file procs_file; /* handle for "cgroup.procs" */
...@@ -305,6 +344,15 @@ struct cgroup { ...@@ -305,6 +344,15 @@ struct cgroup {
*/ */
struct list_head e_csets[CGROUP_SUBSYS_COUNT]; struct list_head e_csets[CGROUP_SUBSYS_COUNT];
/*
* If !threaded, self. If threaded, it points to the nearest
* domain ancestor. Inside a threaded subtree, cgroups are exempt
* from process granularity and no-internal-task constraint.
* Domain level resource consumptions which aren't tied to a
* specific task are charged to the dom_cgrp.
*/
struct cgroup *dom_cgrp;
/* /*
* list of pidlists, up to two for each namespace (one for procs, one * list of pidlists, up to two for each namespace (one for procs, one
* for tasks); created on demand. * for tasks); created on demand.
...@@ -491,6 +539,18 @@ struct cgroup_subsys { ...@@ -491,6 +539,18 @@ struct cgroup_subsys {
*/ */
bool implicit_on_dfl:1; bool implicit_on_dfl:1;
/*
* If %true, the controller, supports threaded mode on the default
* hierarchy. In a threaded subtree, both process granularity and
* no-internal-process constraint are ignored and a threaded
* controllers should be able to handle that.
*
* Note that as an implicit controller is automatically enabled on
* all cgroups on the default hierarchy, it should also be
* threaded. implicit && !threaded is not supported.
*/
bool threaded:1;
/* /*
* If %false, this subsystem is properly hierarchical - * If %false, this subsystem is properly hierarchical -
* configuration, resource accounting and restriction on a parent * configuration, resource accounting and restriction on a parent
......
...@@ -36,18 +36,28 @@ ...@@ -36,18 +36,28 @@
#define CGROUP_WEIGHT_DFL 100 #define CGROUP_WEIGHT_DFL 100
#define CGROUP_WEIGHT_MAX 10000 #define CGROUP_WEIGHT_MAX 10000
/* walk only threadgroup leaders */
#define CSS_TASK_ITER_PROCS (1U << 0)
/* walk all threaded css_sets in the domain */
#define CSS_TASK_ITER_THREADED (1U << 1)
/* a css_task_iter should be treated as an opaque object */ /* a css_task_iter should be treated as an opaque object */
struct css_task_iter { struct css_task_iter {
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
unsigned int flags;
struct list_head *cset_pos; struct list_head *cset_pos;
struct list_head *cset_head; struct list_head *cset_head;
struct list_head *tcset_pos;
struct list_head *tcset_head;
struct list_head *task_pos; struct list_head *task_pos;
struct list_head *tasks_head; struct list_head *tasks_head;
struct list_head *mg_tasks_head; struct list_head *mg_tasks_head;
struct css_set *cur_cset; struct css_set *cur_cset;
struct css_set *cur_dcset;
struct task_struct *cur_task; struct task_struct *cur_task;
struct list_head iters_node; /* css_set->task_iters */ struct list_head iters_node; /* css_set->task_iters */
}; };
...@@ -129,7 +139,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, ...@@ -129,7 +139,7 @@ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct cgroup_subsys_state **dst_cssp); struct cgroup_subsys_state **dst_cssp);
void css_task_iter_start(struct cgroup_subsys_state *css, void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
struct css_task_iter *it); struct css_task_iter *it);
struct task_struct *css_task_iter_next(struct css_task_iter *it); struct task_struct *css_task_iter_next(struct css_task_iter *it);
void css_task_iter_end(struct css_task_iter *it); void css_task_iter_end(struct css_task_iter *it);
...@@ -388,6 +398,16 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) ...@@ -388,6 +398,16 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
percpu_ref_put_many(&css->refcnt, n); percpu_ref_put_many(&css->refcnt, n);
} }
static inline void cgroup_get(struct cgroup *cgrp)
{
css_get(&cgrp->self);
}
static inline bool cgroup_tryget(struct cgroup *cgrp)
{
return css_tryget(&cgrp->self);
}
static inline void cgroup_put(struct cgroup *cgrp) static inline void cgroup_put(struct cgroup *cgrp)
{ {
css_put(&cgrp->self); css_put(&cgrp->self);
...@@ -500,6 +520,20 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, ...@@ -500,6 +520,20 @@ static inline struct cgroup *task_cgroup(struct task_struct *task,
return task_css(task, subsys_id)->cgroup; return task_css(task, subsys_id)->cgroup;
} }
static inline struct cgroup *task_dfl_cgroup(struct task_struct *task)
{
return task_css_set(task)->dfl_cgrp;
}
static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
{
struct cgroup_subsys_state *parent_css = cgrp->self.parent;
if (parent_css)
return container_of(parent_css, struct cgroup, self);
return NULL;
}
/** /**
* cgroup_is_descendant - test ancestry * cgroup_is_descendant - test ancestry
* @cgrp: the cgroup to be tested * @cgrp: the cgroup to be tested
...@@ -537,7 +571,8 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task, ...@@ -537,7 +571,8 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
/* no synchronization, the result can only be used as a hint */ /* no synchronization, the result can only be used as a hint */
static inline bool cgroup_is_populated(struct cgroup *cgrp) static inline bool cgroup_is_populated(struct cgroup *cgrp)
{ {
return cgrp->populated_cnt; return cgrp->nr_populated_csets + cgrp->nr_populated_domain_children +
cgrp->nr_populated_threaded_children;
} }
/* returns ino associated with a cgroup */ /* returns ino associated with a cgroup */
......
...@@ -156,6 +156,8 @@ static inline void get_css_set(struct css_set *cset) ...@@ -156,6 +156,8 @@ static inline void get_css_set(struct css_set *cset)
bool cgroup_ssid_enabled(int ssid); bool cgroup_ssid_enabled(int ssid);
bool cgroup_on_dfl(const struct cgroup *cgrp); bool cgroup_on_dfl(const struct cgroup *cgrp);
bool cgroup_is_thread_root(struct cgroup *cgrp);
bool cgroup_is_threaded(struct cgroup *cgrp);
struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root); struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root);
struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroup *task_cgroup_from_root(struct task_struct *task,
...@@ -173,7 +175,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags, ...@@ -173,7 +175,7 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct cgroup_root *root, unsigned long magic, struct cgroup_root *root, unsigned long magic,
struct cgroup_namespace *ns); struct cgroup_namespace *ns);
bool cgroup_may_migrate_to(struct cgroup *dst_cgrp); int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp);
void cgroup_migrate_finish(struct cgroup_mgctx *mgctx); void cgroup_migrate_finish(struct cgroup_mgctx *mgctx);
void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp,
struct cgroup_mgctx *mgctx); struct cgroup_mgctx *mgctx);
...@@ -183,10 +185,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup, ...@@ -183,10 +185,10 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
bool threadgroup); bool threadgroup);
ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup)
size_t nbytes, loff_t off, bool threadgroup); __acquires(&cgroup_threadgroup_rwsem);
ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, void cgroup_procs_write_finish(struct task_struct *task)
loff_t off); __releases(&cgroup_threadgroup_rwsem);
void cgroup_lock_and_drain_offline(struct cgroup *cgrp); void cgroup_lock_and_drain_offline(struct cgroup *cgrp);
......
...@@ -99,8 +99,9 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) ...@@ -99,8 +99,9 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
if (cgroup_on_dfl(to)) if (cgroup_on_dfl(to))
return -EINVAL; return -EINVAL;
if (!cgroup_may_migrate_to(to)) ret = cgroup_migrate_vet_dst(to);
return -EBUSY; if (ret)
return ret;
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
...@@ -121,7 +122,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) ...@@ -121,7 +122,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
* ->can_attach() fails. * ->can_attach() fails.
*/ */
do { do {
css_task_iter_start(&from->self, &it); css_task_iter_start(&from->self, 0, &it);
task = css_task_iter_next(&it); task = css_task_iter_next(&it);
if (task) if (task)
get_task_struct(task); get_task_struct(task);
...@@ -373,7 +374,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, ...@@ -373,7 +374,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
if (!array) if (!array)
return -ENOMEM; return -ENOMEM;
/* now, populate the array */ /* now, populate the array */
css_task_iter_start(&cgrp->self, &it); css_task_iter_start(&cgrp->self, 0, &it);
while ((tsk = css_task_iter_next(&it))) { while ((tsk = css_task_iter_next(&it))) {
if (unlikely(n == length)) if (unlikely(n == length))
break; break;
...@@ -510,10 +511,58 @@ static int cgroup_pidlist_show(struct seq_file *s, void *v) ...@@ -510,10 +511,58 @@ static int cgroup_pidlist_show(struct seq_file *s, void *v)
return 0; return 0;
} }
static ssize_t cgroup_tasks_write(struct kernfs_open_file *of, static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off) char *buf, size_t nbytes, loff_t off,
bool threadgroup)
{ {
return __cgroup_procs_write(of, buf, nbytes, off, false); struct cgroup *cgrp;
struct task_struct *task;
const struct cred *cred, *tcred;
ssize_t ret;
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
return -ENODEV;
task = cgroup_procs_write_start(buf, threadgroup);
ret = PTR_ERR_OR_ZERO(task);
if (ret)
goto out_unlock;
/*
* Even if we're attaching all tasks in the thread group, we only
* need to check permissions on one of them.
*/
cred = current_cred();
tcred = get_task_cred(task);
if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
!uid_eq(cred->euid, tcred->uid) &&
!uid_eq(cred->euid, tcred->suid))
ret = -EACCES;
put_cred(tcred);
if (ret)
goto out_finish;
ret = cgroup_attach_task(cgrp, task, threadgroup);
out_finish:
cgroup_procs_write_finish(task);
out_unlock:
cgroup_kn_unlock(of->kn);
return ret ?: nbytes;
}
static ssize_t cgroup1_procs_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
return __cgroup1_procs_write(of, buf, nbytes, off, true);
}
static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
return __cgroup1_procs_write(of, buf, nbytes, off, false);
} }
static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
...@@ -592,7 +641,7 @@ struct cftype cgroup1_base_files[] = { ...@@ -592,7 +641,7 @@ struct cftype cgroup1_base_files[] = {
.seq_stop = cgroup_pidlist_stop, .seq_stop = cgroup_pidlist_stop,
.seq_show = cgroup_pidlist_show, .seq_show = cgroup_pidlist_show,
.private = CGROUP_FILE_PROCS, .private = CGROUP_FILE_PROCS,
.write = cgroup_procs_write, .write = cgroup1_procs_write,
}, },
{ {
.name = "cgroup.clone_children", .name = "cgroup.clone_children",
...@@ -611,7 +660,7 @@ struct cftype cgroup1_base_files[] = { ...@@ -611,7 +660,7 @@ struct cftype cgroup1_base_files[] = {
.seq_stop = cgroup_pidlist_stop, .seq_stop = cgroup_pidlist_stop,
.seq_show = cgroup_pidlist_show, .seq_show = cgroup_pidlist_show,
.private = CGROUP_FILE_TASKS, .private = CGROUP_FILE_TASKS,
.write = cgroup_tasks_write, .write = cgroup1_tasks_write,
}, },
{ {
.name = "notify_on_release", .name = "notify_on_release",
...@@ -701,7 +750,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) ...@@ -701,7 +750,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
} }
rcu_read_unlock(); rcu_read_unlock();
css_task_iter_start(&cgrp->self, &it); css_task_iter_start(&cgrp->self, 0, &it);
while ((tsk = css_task_iter_next(&it))) { while ((tsk = css_task_iter_next(&it))) {
switch (tsk->state) { switch (tsk->state) {
case TASK_RUNNING: case TASK_RUNNING:
...@@ -846,6 +895,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo ...@@ -846,6 +895,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
seq_puts(seq, ",noprefix"); seq_puts(seq, ",noprefix");
if (root->flags & CGRP_ROOT_XATTR) if (root->flags & CGRP_ROOT_XATTR)
seq_puts(seq, ",xattr"); seq_puts(seq, ",xattr");
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
seq_puts(seq, ",cpuset_v2_mode");
spin_lock(&release_agent_path_lock); spin_lock(&release_agent_path_lock);
if (strlen(root->release_agent_path)) if (strlen(root->release_agent_path))
...@@ -900,6 +951,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) ...@@ -900,6 +951,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
opts->cpuset_clone_children = true; opts->cpuset_clone_children = true;
continue; continue;
} }
if (!strcmp(token, "cpuset_v2_mode")) {
opts->flags |= CGRP_ROOT_CPUSET_V2_MODE;
continue;
}
if (!strcmp(token, "xattr")) { if (!strcmp(token, "xattr")) {
opts->flags |= CGRP_ROOT_XATTR; opts->flags |= CGRP_ROOT_XATTR;
continue; continue;
......
This diff is collapsed.
...@@ -300,6 +300,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn); ...@@ -300,6 +300,16 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq); static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
/*
* Cgroup v2 behavior is used when on default hierarchy or the
* cgroup_v2_mode flag is set.
*/
static inline bool is_in_v2_mode(void)
{
return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
(cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
}
/* /*
* This is ugly, but preserves the userspace API for existing cpuset * This is ugly, but preserves the userspace API for existing cpuset
* users. If someone tries to mount the "cpuset" filesystem, we * users. If someone tries to mount the "cpuset" filesystem, we
...@@ -490,8 +500,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) ...@@ -490,8 +500,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
/* On legacy hiearchy, we must be a subset of our parent cpuset. */ /* On legacy hiearchy, we must be a subset of our parent cpuset. */
ret = -EACCES; ret = -EACCES;
if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
!is_cpuset_subset(trial, par))
goto out; goto out;
/* /*
...@@ -870,7 +879,7 @@ static void update_tasks_cpumask(struct cpuset *cs) ...@@ -870,7 +879,7 @@ static void update_tasks_cpumask(struct cpuset *cs)
struct css_task_iter it; struct css_task_iter it;
struct task_struct *task; struct task_struct *task;
css_task_iter_start(&cs->css, &it); css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it))) while ((task = css_task_iter_next(&it)))
set_cpus_allowed_ptr(task, cs->effective_cpus); set_cpus_allowed_ptr(task, cs->effective_cpus);
css_task_iter_end(&it); css_task_iter_end(&it);
...@@ -904,8 +913,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) ...@@ -904,8 +913,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
* If it becomes empty, inherit the effective mask of the * If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some CPUs. * parent, which is guaranteed to have some CPUs.
*/ */
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && if (is_in_v2_mode() && cpumask_empty(new_cpus))
cpumask_empty(new_cpus))
cpumask_copy(new_cpus, parent->effective_cpus); cpumask_copy(new_cpus, parent->effective_cpus);
/* Skip the whole subtree if the cpumask remains the same. */ /* Skip the whole subtree if the cpumask remains the same. */
...@@ -922,7 +930,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus) ...@@ -922,7 +930,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
cpumask_copy(cp->effective_cpus, new_cpus); cpumask_copy(cp->effective_cpus, new_cpus);
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && WARN_ON(!is_in_v2_mode() &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
update_tasks_cpumask(cp); update_tasks_cpumask(cp);
...@@ -1100,7 +1108,7 @@ static void update_tasks_nodemask(struct cpuset *cs) ...@@ -1100,7 +1108,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
* It's ok if we rebind the same mm twice; mpol_rebind_mm() * It's ok if we rebind the same mm twice; mpol_rebind_mm()
* is idempotent. Also migrate pages in each mm to new nodes. * is idempotent. Also migrate pages in each mm to new nodes.
*/ */
css_task_iter_start(&cs->css, &it); css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it))) { while ((task = css_task_iter_next(&it))) {
struct mm_struct *mm; struct mm_struct *mm;
bool migrate; bool migrate;
...@@ -1158,8 +1166,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) ...@@ -1158,8 +1166,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
* If it becomes empty, inherit the effective mask of the * If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some MEMs. * parent, which is guaranteed to have some MEMs.
*/ */
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && if (is_in_v2_mode() && nodes_empty(*new_mems))
nodes_empty(*new_mems))
*new_mems = parent->effective_mems; *new_mems = parent->effective_mems;
/* Skip the whole subtree if the nodemask remains the same. */ /* Skip the whole subtree if the nodemask remains the same. */
...@@ -1176,7 +1183,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) ...@@ -1176,7 +1183,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
cp->effective_mems = *new_mems; cp->effective_mems = *new_mems;
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
WARN_ON(!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && WARN_ON(!is_in_v2_mode() &&
!nodes_equal(cp->mems_allowed, cp->effective_mems)); !nodes_equal(cp->mems_allowed, cp->effective_mems));
update_tasks_nodemask(cp); update_tasks_nodemask(cp);
...@@ -1293,7 +1300,7 @@ static void update_tasks_flags(struct cpuset *cs) ...@@ -1293,7 +1300,7 @@ static void update_tasks_flags(struct cpuset *cs)
struct css_task_iter it; struct css_task_iter it;
struct task_struct *task; struct task_struct *task;
css_task_iter_start(&cs->css, &it); css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it))) while ((task = css_task_iter_next(&it)))
cpuset_update_task_spread_flag(cs, task); cpuset_update_task_spread_flag(cs, task);
css_task_iter_end(&it); css_task_iter_end(&it);
...@@ -1468,7 +1475,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) ...@@ -1468,7 +1475,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
/* allow moving tasks into an empty cpuset if on default hierarchy */ /* allow moving tasks into an empty cpuset if on default hierarchy */
ret = -ENOSPC; ret = -ENOSPC;
if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) && if (!is_in_v2_mode() &&
(cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
goto out_unlock; goto out_unlock;
...@@ -1987,7 +1994,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) ...@@ -1987,7 +1994,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
cpuset_inc(); cpuset_inc();
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { if (is_in_v2_mode()) {
cpumask_copy(cs->effective_cpus, parent->effective_cpus); cpumask_copy(cs->effective_cpus, parent->effective_cpus);
cs->effective_mems = parent->effective_mems; cs->effective_mems = parent->effective_mems;
} }
...@@ -2064,7 +2071,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) ...@@ -2064,7 +2071,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
mutex_lock(&cpuset_mutex); mutex_lock(&cpuset_mutex);
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { if (is_in_v2_mode()) {
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
top_cpuset.mems_allowed = node_possible_map; top_cpuset.mems_allowed = node_possible_map;
} else { } else {
...@@ -2258,7 +2265,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs) ...@@ -2258,7 +2265,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs)
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
mems_updated = !nodes_equal(new_mems, cs->effective_mems); mems_updated = !nodes_equal(new_mems, cs->effective_mems);
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) if (is_in_v2_mode())
hotplug_update_tasks(cs, &new_cpus, &new_mems, hotplug_update_tasks(cs, &new_cpus, &new_mems,
cpus_updated, mems_updated); cpus_updated, mems_updated);
else else
...@@ -2289,7 +2296,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -2289,7 +2296,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
static cpumask_t new_cpus; static cpumask_t new_cpus;
static nodemask_t new_mems; static nodemask_t new_mems;
bool cpus_updated, mems_updated; bool cpus_updated, mems_updated;
bool on_dfl = cgroup_subsys_on_dfl(cpuset_cgrp_subsys); bool on_dfl = is_in_v2_mode();
mutex_lock(&cpuset_mutex); mutex_lock(&cpuset_mutex);
......
...@@ -114,27 +114,49 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) ...@@ -114,27 +114,49 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
{ {
struct cgroup_subsys_state *css = seq_css(seq); struct cgroup_subsys_state *css = seq_css(seq);
struct cgrp_cset_link *link; struct cgrp_cset_link *link;
int dead_cnt = 0, extra_refs = 0; int dead_cnt = 0, extra_refs = 0, threaded_csets = 0;
spin_lock_irq(&css_set_lock); spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &css->cgroup->cset_links, cset_link) { list_for_each_entry(link, &css->cgroup->cset_links, cset_link) {
struct css_set *cset = link->cset; struct css_set *cset = link->cset;
struct task_struct *task; struct task_struct *task;
int count = 0; int count = 0;
int refcnt = refcount_read(&cset->refcount); int refcnt = refcount_read(&cset->refcount);
seq_printf(seq, " %d", refcnt); /*
if (refcnt - cset->nr_tasks > 0) { * Print out the proc_cset and threaded_cset relationship
int extra = refcnt - cset->nr_tasks; * and highlight difference between refcount and task_count.
*/
seq_printf(seq, " +%d", extra); seq_printf(seq, "css_set %pK", cset);
/* if (rcu_dereference_protected(cset->dom_cset, 1) != cset) {
* Take out the one additional reference in threaded_csets++;
* init_css_set. seq_printf(seq, "=>%pK", cset->dom_cset);
*/ }
if (cset == &init_css_set) if (!list_empty(&cset->threaded_csets)) {
extra--; struct css_set *tcset;
extra_refs += extra; int idx = 0;
list_for_each_entry(tcset, &cset->threaded_csets,
threaded_csets_node) {
seq_puts(seq, idx ? "," : "<=");
seq_printf(seq, "%pK", tcset);
idx++;
}
} else {
seq_printf(seq, " %d", refcnt);
if (refcnt - cset->nr_tasks > 0) {
int extra = refcnt - cset->nr_tasks;
seq_printf(seq, " +%d", extra);
/*
* Take out the one additional reference in
* init_css_set.
*/
if (cset == &init_css_set)
extra--;
extra_refs += extra;
}
} }
seq_puts(seq, "\n"); seq_puts(seq, "\n");
...@@ -163,10 +185,12 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v) ...@@ -163,10 +185,12 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
} }
spin_unlock_irq(&css_set_lock); spin_unlock_irq(&css_set_lock);
if (!dead_cnt && !extra_refs) if (!dead_cnt && !extra_refs && !threaded_csets)
return 0; return 0;
seq_puts(seq, "\n"); seq_puts(seq, "\n");
if (threaded_csets)
seq_printf(seq, "threaded css_sets = %d\n", threaded_csets);
if (extra_refs) if (extra_refs)
seq_printf(seq, "extra references = %d\n", extra_refs); seq_printf(seq, "extra references = %d\n", extra_refs);
if (dead_cnt) if (dead_cnt)
...@@ -352,6 +376,7 @@ static int __init enable_cgroup_debug(char *str) ...@@ -352,6 +376,7 @@ static int __init enable_cgroup_debug(char *str)
{ {
debug_cgrp_subsys.dfl_cftypes = debug_files; debug_cgrp_subsys.dfl_cftypes = debug_files;
debug_cgrp_subsys.implicit_on_dfl = true; debug_cgrp_subsys.implicit_on_dfl = true;
debug_cgrp_subsys.threaded = true;
return 1; return 1;
} }
__setup("cgroup_debug", enable_cgroup_debug); __setup("cgroup_debug", enable_cgroup_debug);
...@@ -268,7 +268,7 @@ static void update_if_frozen(struct cgroup_subsys_state *css) ...@@ -268,7 +268,7 @@ static void update_if_frozen(struct cgroup_subsys_state *css)
rcu_read_unlock(); rcu_read_unlock();
/* are all tasks frozen? */ /* are all tasks frozen? */
css_task_iter_start(css, &it); css_task_iter_start(css, 0, &it);
while ((task = css_task_iter_next(&it))) { while ((task = css_task_iter_next(&it))) {
if (freezing(task)) { if (freezing(task)) {
...@@ -320,7 +320,7 @@ static void freeze_cgroup(struct freezer *freezer) ...@@ -320,7 +320,7 @@ static void freeze_cgroup(struct freezer *freezer)
struct css_task_iter it; struct css_task_iter it;
struct task_struct *task; struct task_struct *task;
css_task_iter_start(&freezer->css, &it); css_task_iter_start(&freezer->css, 0, &it);
while ((task = css_task_iter_next(&it))) while ((task = css_task_iter_next(&it)))
freeze_task(task); freeze_task(task);
css_task_iter_end(&it); css_task_iter_end(&it);
...@@ -331,7 +331,7 @@ static void unfreeze_cgroup(struct freezer *freezer) ...@@ -331,7 +331,7 @@ static void unfreeze_cgroup(struct freezer *freezer)
struct css_task_iter it; struct css_task_iter it;
struct task_struct *task; struct task_struct *task;
css_task_iter_start(&freezer->css, &it); css_task_iter_start(&freezer->css, 0, &it);
while ((task = css_task_iter_next(&it))) while ((task = css_task_iter_next(&it)))
__thaw_task(task); __thaw_task(task);
css_task_iter_end(&it); css_task_iter_end(&it);
......
...@@ -345,4 +345,5 @@ struct cgroup_subsys pids_cgrp_subsys = { ...@@ -345,4 +345,5 @@ struct cgroup_subsys pids_cgrp_subsys = {
.free = pids_free, .free = pids_free,
.legacy_cftypes = pids_files, .legacy_cftypes = pids_files,
.dfl_cftypes = pids_files, .dfl_cftypes = pids_files,
.threaded = true,
}; };
...@@ -11293,5 +11293,6 @@ struct cgroup_subsys perf_event_cgrp_subsys = { ...@@ -11293,5 +11293,6 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
* controller is not mounted on a legacy hierarchy. * controller is not mounted on a legacy hierarchy.
*/ */
.implicit_on_dfl = true, .implicit_on_dfl = true,
.threaded = true,
}; };
#endif /* CONFIG_CGROUP_PERF */ #endif /* CONFIG_CGROUP_PERF */
...@@ -919,7 +919,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg, ...@@ -919,7 +919,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
struct css_task_iter it; struct css_task_iter it;
struct task_struct *task; struct task_struct *task;
css_task_iter_start(&iter->css, &it); css_task_iter_start(&iter->css, 0, &it);
while (!ret && (task = css_task_iter_next(&it))) while (!ret && (task = css_task_iter_next(&it)))
ret = fn(task, arg); ret = fn(task, arg);
css_task_iter_end(&it); css_task_iter_end(&it);
......
...@@ -100,7 +100,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -100,7 +100,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
cs->classid = (u32)value; cs->classid = (u32)value;
css_task_iter_start(css, &it); css_task_iter_start(css, 0, &it);
while ((p = css_task_iter_next(&it))) { while ((p = css_task_iter_next(&it))) {
task_lock(p); task_lock(p);
iterate_fd(p->files, 0, update_classid_sock, iterate_fd(p->files, 0, update_classid_sock,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment