Commit 69dc8010 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "Two cpuset behavior changes:

   - cpuset on cgroup2 is changed to enable memory migration based on
     nodemask by default.

   - A notification is generated when cpuset partition state changes.

  All other patches are minor fixes and cleanups"

* 'for-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: Avoid compiler warnings with no subsystems
  cgroup/cpuset: Avoid memory migration when nodemasks match
  cgroup/cpuset: Enable memory migration for cpuset v2
  cgroup/cpuset: Enable event notification when partition state changes
  cgroup: cgroup-v1: clean up kernel-doc notation
  cgroup: Replace deprecated CPU-hotplug functions.
  cgroup/cpuset: Fix violation of cpuset locking rule
  cgroup/cpuset: Fix a partition bug with hotplug
  cgroup/cpuset: Miscellaneous code cleanup
  cgroup: remove cgroup_mount from comments
parents 81b0b29b d20d30eb
...@@ -2056,6 +2056,17 @@ Cpuset Interface Files ...@@ -2056,6 +2056,17 @@ Cpuset Interface Files
The value of "cpuset.mems" stays constant until the next update The value of "cpuset.mems" stays constant until the next update
and won't be affected by any memory nodes hotplug events. and won't be affected by any memory nodes hotplug events.
Setting a non-empty value to "cpuset.mems" causes memory of
tasks within the cgroup to be migrated to the designated nodes if
they are currently using memory outside of the designated nodes.
There is a cost for this memory migration. The migration
may not be complete and some memory pages may be left behind.
So it is recommended that "cpuset.mems" should be set properly
before spawning new tasks into the cpuset. Even if there is
a need to change "cpuset.mems" with active tasks, it shouldn't
be done frequently.
cpuset.mems.effective cpuset.mems.effective
A read-only multiple values file which exists on all A read-only multiple values file which exists on all
cpuset-enabled cgroups. cpuset-enabled cgroups.
......
...@@ -50,6 +50,8 @@ bool cgroup1_ssid_disabled(int ssid) ...@@ -50,6 +50,8 @@ bool cgroup1_ssid_disabled(int ssid)
* cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
* @from: attach to all cgroups of a given task * @from: attach to all cgroups of a given task
* @tsk: the task to be attached * @tsk: the task to be attached
*
* Return: %0 on success or a negative errno code on failure
*/ */
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
{ {
...@@ -80,7 +82,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) ...@@ -80,7 +82,7 @@ int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
EXPORT_SYMBOL_GPL(cgroup_attach_task_all); EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
/** /**
* cgroup_trasnsfer_tasks - move tasks from one cgroup to another * cgroup_transfer_tasks - move tasks from one cgroup to another
* @to: cgroup to which the tasks will be moved * @to: cgroup to which the tasks will be moved
* @from: cgroup in which the tasks currently reside * @from: cgroup in which the tasks currently reside
* *
...@@ -89,6 +91,8 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all); ...@@ -89,6 +91,8 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
* is guaranteed to be either visible in the source cgroup after the * is guaranteed to be either visible in the source cgroup after the
* parent's migration is complete or put into the target cgroup. No task * parent's migration is complete or put into the target cgroup. No task
* can slip out of migration through forking. * can slip out of migration through forking.
*
* Return: %0 on success or a negative errno code on failure
*/ */
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
{ {
...@@ -682,6 +686,8 @@ int proc_cgroupstats_show(struct seq_file *m, void *v) ...@@ -682,6 +686,8 @@ int proc_cgroupstats_show(struct seq_file *m, void *v)
* *
* Build and fill cgroupstats so that taskstats can export it to user * Build and fill cgroupstats so that taskstats can export it to user
* space. * space.
*
* Return: %0 on success or a negative errno code on failure
*/ */
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
{ {
......
...@@ -67,6 +67,14 @@ ...@@ -67,6 +67,14 @@
/* let's not notify more than 100 times per second */ /* let's not notify more than 100 times per second */
#define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100) #define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
/*
* To avoid confusing the compiler (and generating warnings) with code
* that attempts to access what would be a 0-element array (i.e. sized
* to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this
* constant expression can be added.
*/
#define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0)
/* /*
* cgroup_mutex is the master lock. Any modification to cgroup or its * cgroup_mutex is the master lock. Any modification to cgroup or its
* hierarchy must be performed while holding it. * hierarchy must be performed while holding it.
...@@ -248,7 +256,7 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, ...@@ -248,7 +256,7 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
*/ */
bool cgroup_ssid_enabled(int ssid) bool cgroup_ssid_enabled(int ssid)
{ {
if (CGROUP_SUBSYS_COUNT == 0) if (!CGROUP_HAS_SUBSYS_CONFIG)
return false; return false;
return static_key_enabled(cgroup_subsys_enabled_key[ssid]); return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
...@@ -472,7 +480,7 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp) ...@@ -472,7 +480,7 @@ static u16 cgroup_ss_mask(struct cgroup *cgrp)
static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
struct cgroup_subsys *ss) struct cgroup_subsys *ss)
{ {
if (ss) if (CGROUP_HAS_SUBSYS_CONFIG && ss)
return rcu_dereference_check(cgrp->subsys[ss->id], return rcu_dereference_check(cgrp->subsys[ss->id],
lockdep_is_held(&cgroup_mutex)); lockdep_is_held(&cgroup_mutex));
else else
...@@ -550,6 +558,9 @@ struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, ...@@ -550,6 +558,9 @@ struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
{ {
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
if (!CGROUP_HAS_SUBSYS_CONFIG)
return NULL;
do { do {
css = cgroup_css(cgrp, ss); css = cgroup_css(cgrp, ss);
...@@ -577,6 +588,9 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp, ...@@ -577,6 +588,9 @@ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
{ {
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
if (!CGROUP_HAS_SUBSYS_CONFIG)
return NULL;
rcu_read_lock(); rcu_read_lock();
do { do {
...@@ -647,7 +661,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) ...@@ -647,7 +661,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
* the matching css from the cgroup's subsys table is guaranteed to * the matching css from the cgroup's subsys table is guaranteed to
* be and stay valid until the enclosing operation is complete. * be and stay valid until the enclosing operation is complete.
*/ */
if (cft->ss) if (CGROUP_HAS_SUBSYS_CONFIG && cft->ss)
return rcu_dereference_raw(cgrp->subsys[cft->ss->id]); return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
else else
return &cgrp->self; return &cgrp->self;
...@@ -695,7 +709,7 @@ EXPORT_SYMBOL_GPL(of_css); ...@@ -695,7 +709,7 @@ EXPORT_SYMBOL_GPL(of_css);
*/ */
#define do_each_subsys_mask(ss, ssid, ss_mask) do { \ #define do_each_subsys_mask(ss, ssid, ss_mask) do { \
unsigned long __ss_mask = (ss_mask); \ unsigned long __ss_mask = (ss_mask); \
if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \ if (!CGROUP_HAS_SUBSYS_CONFIG) { \
(ssid) = 0; \ (ssid) = 0; \
break; \ break; \
} \ } \
...@@ -2169,7 +2183,6 @@ static void cgroup_kill_sb(struct super_block *sb) ...@@ -2169,7 +2183,6 @@ static void cgroup_kill_sb(struct super_block *sb)
/* /*
* If @root doesn't have any children, start killing it. * If @root doesn't have any children, start killing it.
* This prevents new mounts by disabling percpu_ref_tryget_live(). * This prevents new mounts by disabling percpu_ref_tryget_live().
* cgroup_mount() may wait for @root's release.
* *
* And don't kill the default root. * And don't kill the default root.
*/ */
...@@ -2373,7 +2386,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, ...@@ -2373,7 +2386,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
struct css_set *cset = tset->cur_cset; struct css_set *cset = tset->cur_cset;
struct task_struct *task = tset->cur_task; struct task_struct *task = tset->cur_task;
while (&cset->mg_node != tset->csets) { while (CGROUP_HAS_SUBSYS_CONFIG && &cset->mg_node != tset->csets) {
if (!task) if (!task)
task = list_first_entry(&cset->mg_tasks, task = list_first_entry(&cset->mg_tasks,
struct task_struct, cg_list); struct task_struct, cg_list);
...@@ -4644,7 +4657,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, ...@@ -4644,7 +4657,7 @@ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
it->ss = css->ss; it->ss = css->ss;
it->flags = flags; it->flags = flags;
if (it->ss) if (CGROUP_HAS_SUBSYS_CONFIG && it->ss)
it->cset_pos = &css->cgroup->e_csets[css->ss->id]; it->cset_pos = &css->cgroup->e_csets[css->ss->id];
else else
it->cset_pos = &css->cgroup->cset_links; it->cset_pos = &css->cgroup->cset_links;
......
...@@ -160,6 +160,9 @@ struct cpuset { ...@@ -160,6 +160,9 @@ struct cpuset {
*/ */
int use_parent_ecpus; int use_parent_ecpus;
int child_ecpus_count; int child_ecpus_count;
/* Handle for cpuset.cpus.partition */
struct cgroup_file partition_file;
}; };
/* /*
...@@ -263,6 +266,16 @@ static inline int is_partition_root(const struct cpuset *cs) ...@@ -263,6 +266,16 @@ static inline int is_partition_root(const struct cpuset *cs)
return cs->partition_root_state > 0; return cs->partition_root_state > 0;
} }
/*
* Send notification event of whenever partition_root_state changes.
*/
static inline void notify_partition_change(struct cpuset *cs,
int old_prs, int new_prs)
{
if (old_prs != new_prs)
cgroup_file_notify(&cs->partition_file);
}
static struct cpuset top_cpuset = { static struct cpuset top_cpuset = {
.flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) | .flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
(1 << CS_MEM_EXCLUSIVE)), (1 << CS_MEM_EXCLUSIVE)),
...@@ -992,7 +1005,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ...@@ -992,7 +1005,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
* 'cpus' is removed, then call this routine to rebuild the * 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains. * scheduler's dynamic sched domains.
* *
* Call with cpuset_mutex held. Takes get_online_cpus(). * Call with cpuset_mutex held. Takes cpus_read_lock().
*/ */
static void rebuild_sched_domains_locked(void) static void rebuild_sched_domains_locked(void)
{ {
...@@ -1053,11 +1066,11 @@ static void rebuild_sched_domains_locked(void) ...@@ -1053,11 +1066,11 @@ static void rebuild_sched_domains_locked(void)
void rebuild_sched_domains(void) void rebuild_sched_domains(void)
{ {
get_online_cpus(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
rebuild_sched_domains_locked(); rebuild_sched_domains_locked();
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
put_online_cpus(); cpus_read_unlock();
} }
/** /**
...@@ -1127,7 +1140,7 @@ enum subparts_cmd { ...@@ -1127,7 +1140,7 @@ enum subparts_cmd {
* cpus_allowed can be granted or an error code will be returned. * cpus_allowed can be granted or an error code will be returned.
* *
* For partcmd_disable, the cpuset is being transofrmed from a partition * For partcmd_disable, the cpuset is being transofrmed from a partition
* root back to a non-partition root. any CPUs in cpus_allowed that are in * root back to a non-partition root. Any CPUs in cpus_allowed that are in
* parent's subparts_cpus will be taken away from that cpumask and put back * parent's subparts_cpus will be taken away from that cpumask and put back
* into parent's effective_cpus. 0 should always be returned. * into parent's effective_cpus. 0 should always be returned.
* *
...@@ -1161,6 +1174,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, ...@@ -1161,6 +1174,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
struct cpuset *parent = parent_cs(cpuset); struct cpuset *parent = parent_cs(cpuset);
int adding; /* Moving cpus from effective_cpus to subparts_cpus */ int adding; /* Moving cpus from effective_cpus to subparts_cpus */
int deleting; /* Moving cpus from subparts_cpus to effective_cpus */ int deleting; /* Moving cpus from subparts_cpus to effective_cpus */
int old_prs, new_prs;
bool part_error = false; /* Partition error? */ bool part_error = false; /* Partition error? */
percpu_rwsem_assert_held(&cpuset_rwsem); percpu_rwsem_assert_held(&cpuset_rwsem);
...@@ -1196,6 +1210,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, ...@@ -1196,6 +1210,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
* A cpumask update cannot make parent's effective_cpus become empty. * A cpumask update cannot make parent's effective_cpus become empty.
*/ */
adding = deleting = false; adding = deleting = false;
old_prs = new_prs = cpuset->partition_root_state;
if (cmd == partcmd_enable) { if (cmd == partcmd_enable) {
cpumask_copy(tmp->addmask, cpuset->cpus_allowed); cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
adding = true; adding = true;
...@@ -1238,7 +1253,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, ...@@ -1238,7 +1253,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
/* /*
* partcmd_update w/o newmask: * partcmd_update w/o newmask:
* *
* addmask = cpus_allowed & parent->effectiveb_cpus * addmask = cpus_allowed & parent->effective_cpus
* *
* Note that parent's subparts_cpus may have been * Note that parent's subparts_cpus may have been
* pre-shrunk in case there is a change in the cpu list. * pre-shrunk in case there is a change in the cpu list.
...@@ -1260,11 +1275,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, ...@@ -1260,11 +1275,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
switch (cpuset->partition_root_state) { switch (cpuset->partition_root_state) {
case PRS_ENABLED: case PRS_ENABLED:
if (part_error) if (part_error)
cpuset->partition_root_state = PRS_ERROR; new_prs = PRS_ERROR;
break; break;
case PRS_ERROR: case PRS_ERROR:
if (!part_error) if (!part_error)
cpuset->partition_root_state = PRS_ENABLED; new_prs = PRS_ENABLED;
break; break;
} }
/* /*
...@@ -1273,10 +1288,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, ...@@ -1273,10 +1288,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
part_error = (prev_prs == PRS_ERROR); part_error = (prev_prs == PRS_ERROR);
} }
if (!part_error && (cpuset->partition_root_state == PRS_ERROR)) if (!part_error && (new_prs == PRS_ERROR))
return 0; /* Nothing need to be done */ return 0; /* Nothing need to be done */
if (cpuset->partition_root_state == PRS_ERROR) { if (new_prs == PRS_ERROR) {
/* /*
* Remove all its cpus from parent's subparts_cpus. * Remove all its cpus from parent's subparts_cpus.
*/ */
...@@ -1285,7 +1300,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, ...@@ -1285,7 +1300,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
parent->subparts_cpus); parent->subparts_cpus);
} }
if (!adding && !deleting) if (!adding && !deleting && (new_prs == old_prs))
return 0; return 0;
/* /*
...@@ -1312,7 +1327,12 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, ...@@ -1312,7 +1327,12 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
} }
parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
if (old_prs != new_prs)
cpuset->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
notify_partition_change(cpuset, old_prs, new_prs);
return cmd == partcmd_update; return cmd == partcmd_update;
} }
...@@ -1334,6 +1354,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -1334,6 +1354,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
struct cpuset *cp; struct cpuset *cp;
struct cgroup_subsys_state *pos_css; struct cgroup_subsys_state *pos_css;
bool need_rebuild_sched_domains = false; bool need_rebuild_sched_domains = false;
int old_prs, new_prs;
rcu_read_lock(); rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, cs) { cpuset_for_each_descendant_pre(cp, pos_css, cs) {
...@@ -1373,17 +1394,18 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -1373,17 +1394,18 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
* update_tasks_cpumask() again for tasks in the parent * update_tasks_cpumask() again for tasks in the parent
* cpuset if the parent's subparts_cpus changes. * cpuset if the parent's subparts_cpus changes.
*/ */
if ((cp != cs) && cp->partition_root_state) { old_prs = new_prs = cp->partition_root_state;
if ((cp != cs) && old_prs) {
switch (parent->partition_root_state) { switch (parent->partition_root_state) {
case PRS_DISABLED: case PRS_DISABLED:
/* /*
* If parent is not a partition root or an * If parent is not a partition root or an
* invalid partition root, clear the state * invalid partition root, clear its state
* state and the CS_CPU_EXCLUSIVE flag. * and its CS_CPU_EXCLUSIVE flag.
*/ */
WARN_ON_ONCE(cp->partition_root_state WARN_ON_ONCE(cp->partition_root_state
!= PRS_ERROR); != PRS_ERROR);
cp->partition_root_state = 0; new_prs = PRS_DISABLED;
/* /*
* clear_bit() is an atomic operation and * clear_bit() is an atomic operation and
...@@ -1404,11 +1426,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -1404,11 +1426,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
/* /*
* When parent is invalid, it has to be too. * When parent is invalid, it has to be too.
*/ */
cp->partition_root_state = PRS_ERROR; new_prs = PRS_ERROR;
if (cp->nr_subparts_cpus) {
cp->nr_subparts_cpus = 0;
cpumask_clear(cp->subparts_cpus);
}
break; break;
} }
} }
...@@ -1420,8 +1438,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -1420,8 +1438,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
cpumask_copy(cp->effective_cpus, tmp->new_cpus); cpumask_copy(cp->effective_cpus, tmp->new_cpus);
if (cp->nr_subparts_cpus && if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
(cp->partition_root_state != PRS_ENABLED)) {
cp->nr_subparts_cpus = 0; cp->nr_subparts_cpus = 0;
cpumask_clear(cp->subparts_cpus); cpumask_clear(cp->subparts_cpus);
} else if (cp->nr_subparts_cpus) { } else if (cp->nr_subparts_cpus) {
...@@ -1448,7 +1465,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -1448,7 +1465,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
= cpumask_weight(cp->subparts_cpus); = cpumask_weight(cp->subparts_cpus);
} }
} }
if (new_prs != old_prs)
cp->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
notify_partition_change(cp, old_prs, new_prs);
WARN_ON(!is_in_v2_mode() && WARN_ON(!is_in_v2_mode() &&
!cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
...@@ -1625,6 +1647,11 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from, ...@@ -1625,6 +1647,11 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
{ {
struct cpuset_migrate_mm_work *mwork; struct cpuset_migrate_mm_work *mwork;
if (nodes_equal(*from, *to)) {
mmput(mm);
return;
}
mwork = kzalloc(sizeof(*mwork), GFP_KERNEL); mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
if (mwork) { if (mwork) {
mwork->mm = mm; mwork->mm = mm;
...@@ -1951,33 +1978,31 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, ...@@ -1951,33 +1978,31 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
/* /*
* update_prstate - update partititon_root_state * update_prstate - update partititon_root_state
* cs: the cpuset to update * cs: the cpuset to update
* val: 0 - disabled, 1 - enabled * new_prs: new partition root state
* *
* Call with cpuset_mutex held. * Call with cpuset_mutex held.
*/ */
static int update_prstate(struct cpuset *cs, int val) static int update_prstate(struct cpuset *cs, int new_prs)
{ {
int err; int err, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs); struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmp; struct tmpmasks tmpmask;
if ((val != 0) && (val != 1)) if (old_prs == new_prs)
return -EINVAL;
if (val == cs->partition_root_state)
return 0; return 0;
/* /*
* Cannot force a partial or invalid partition root to a full * Cannot force a partial or invalid partition root to a full
* partition root. * partition root.
*/ */
if (val && cs->partition_root_state) if (new_prs && (old_prs == PRS_ERROR))
return -EINVAL; return -EINVAL;
if (alloc_cpumasks(NULL, &tmp)) if (alloc_cpumasks(NULL, &tmpmask))
return -ENOMEM; return -ENOMEM;
err = -EINVAL; err = -EINVAL;
if (!cs->partition_root_state) { if (!old_prs) {
/* /*
* Turning on partition root requires setting the * Turning on partition root requires setting the
* CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed * CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
...@@ -1991,31 +2016,27 @@ static int update_prstate(struct cpuset *cs, int val) ...@@ -1991,31 +2016,27 @@ static int update_prstate(struct cpuset *cs, int val)
goto out; goto out;
err = update_parent_subparts_cpumask(cs, partcmd_enable, err = update_parent_subparts_cpumask(cs, partcmd_enable,
NULL, &tmp); NULL, &tmpmask);
if (err) { if (err) {
update_flag(CS_CPU_EXCLUSIVE, cs, 0); update_flag(CS_CPU_EXCLUSIVE, cs, 0);
goto out; goto out;
} }
cs->partition_root_state = PRS_ENABLED;
} else { } else {
/* /*
* Turning off partition root will clear the * Turning off partition root will clear the
* CS_CPU_EXCLUSIVE bit. * CS_CPU_EXCLUSIVE bit.
*/ */
if (cs->partition_root_state == PRS_ERROR) { if (old_prs == PRS_ERROR) {
cs->partition_root_state = 0;
update_flag(CS_CPU_EXCLUSIVE, cs, 0); update_flag(CS_CPU_EXCLUSIVE, cs, 0);
err = 0; err = 0;
goto out; goto out;
} }
err = update_parent_subparts_cpumask(cs, partcmd_disable, err = update_parent_subparts_cpumask(cs, partcmd_disable,
NULL, &tmp); NULL, &tmpmask);
if (err) if (err)
goto out; goto out;
cs->partition_root_state = 0;
/* Turning off CS_CPU_EXCLUSIVE will not return error */ /* Turning off CS_CPU_EXCLUSIVE will not return error */
update_flag(CS_CPU_EXCLUSIVE, cs, 0); update_flag(CS_CPU_EXCLUSIVE, cs, 0);
} }
...@@ -2028,11 +2049,18 @@ static int update_prstate(struct cpuset *cs, int val) ...@@ -2028,11 +2049,18 @@ static int update_prstate(struct cpuset *cs, int val)
update_tasks_cpumask(parent); update_tasks_cpumask(parent);
if (parent->child_ecpus_count) if (parent->child_ecpus_count)
update_sibling_cpumasks(parent, cs, &tmp); update_sibling_cpumasks(parent, cs, &tmpmask);
rebuild_sched_domains_locked(); rebuild_sched_domains_locked();
out: out:
free_cpumasks(NULL, &tmp); if (!err) {
spin_lock_irq(&callback_lock);
cs->partition_root_state = new_prs;
spin_unlock_irq(&callback_lock);
notify_partition_change(cs, old_prs, new_prs);
}
free_cpumasks(NULL, &tmpmask);
return err; return err;
} }
...@@ -2293,7 +2321,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -2293,7 +2321,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
cpuset_filetype_t type = cft->private; cpuset_filetype_t type = cft->private;
int retval = 0; int retval = 0;
get_online_cpus(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
if (!is_cpuset_online(cs)) { if (!is_cpuset_online(cs)) {
retval = -ENODEV; retval = -ENODEV;
...@@ -2331,7 +2359,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -2331,7 +2359,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
} }
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
put_online_cpus(); cpus_read_unlock();
return retval; return retval;
} }
...@@ -2342,7 +2370,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -2342,7 +2370,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
cpuset_filetype_t type = cft->private; cpuset_filetype_t type = cft->private;
int retval = -ENODEV; int retval = -ENODEV;
get_online_cpus(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
if (!is_cpuset_online(cs)) if (!is_cpuset_online(cs))
goto out_unlock; goto out_unlock;
...@@ -2357,7 +2385,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -2357,7 +2385,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
} }
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
put_online_cpus(); cpus_read_unlock();
return retval; return retval;
} }
...@@ -2396,7 +2424,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, ...@@ -2396,7 +2424,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
kernfs_break_active_protection(of->kn); kernfs_break_active_protection(of->kn);
flush_work(&cpuset_hotplug_work); flush_work(&cpuset_hotplug_work);
get_online_cpus(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
if (!is_cpuset_online(cs)) if (!is_cpuset_online(cs))
goto out_unlock; goto out_unlock;
...@@ -2422,7 +2450,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, ...@@ -2422,7 +2450,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
free_cpuset(trialcs); free_cpuset(trialcs);
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
put_online_cpus(); cpus_read_unlock();
kernfs_unbreak_active_protection(of->kn); kernfs_unbreak_active_protection(of->kn);
css_put(&cs->css); css_put(&cs->css);
flush_workqueue(cpuset_migrate_mm_wq); flush_workqueue(cpuset_migrate_mm_wq);
...@@ -2553,7 +2581,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, ...@@ -2553,7 +2581,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
return -EINVAL; return -EINVAL;
css_get(&cs->css); css_get(&cs->css);
get_online_cpus(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
if (!is_cpuset_online(cs)) if (!is_cpuset_online(cs))
goto out_unlock; goto out_unlock;
...@@ -2561,7 +2589,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, ...@@ -2561,7 +2589,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
retval = update_prstate(cs, val); retval = update_prstate(cs, val);
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
put_online_cpus(); cpus_read_unlock();
css_put(&cs->css); css_put(&cs->css);
return retval ?: nbytes; return retval ?: nbytes;
} }
...@@ -2713,6 +2741,7 @@ static struct cftype dfl_files[] = { ...@@ -2713,6 +2741,7 @@ static struct cftype dfl_files[] = {
.write = sched_partition_write, .write = sched_partition_write,
.private = FILE_PARTITION_ROOT, .private = FILE_PARTITION_ROOT,
.flags = CFTYPE_NOT_ON_ROOT, .flags = CFTYPE_NOT_ON_ROOT,
.file_offset = offsetof(struct cpuset, partition_file),
}, },
{ {
...@@ -2748,12 +2777,16 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -2748,12 +2777,16 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); __set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
nodes_clear(cs->mems_allowed); nodes_clear(cs->mems_allowed);
nodes_clear(cs->effective_mems); nodes_clear(cs->effective_mems);
fmeter_init(&cs->fmeter); fmeter_init(&cs->fmeter);
cs->relax_domain_level = -1; cs->relax_domain_level = -1;
/* Set CS_MEMORY_MIGRATE for default hierarchy */
if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
__set_bit(CS_MEMORY_MIGRATE, &cs->flags);
return &cs->css; return &cs->css;
} }
...@@ -2767,7 +2800,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) ...@@ -2767,7 +2800,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
if (!parent) if (!parent)
return 0; return 0;
get_online_cpus(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
set_bit(CS_ONLINE, &cs->flags); set_bit(CS_ONLINE, &cs->flags);
...@@ -2820,7 +2853,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) ...@@ -2820,7 +2853,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
put_online_cpus(); cpus_read_unlock();
return 0; return 0;
} }
...@@ -2839,7 +2872,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) ...@@ -2839,7 +2872,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
{ {
struct cpuset *cs = css_cs(css); struct cpuset *cs = css_cs(css);
get_online_cpus(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
if (is_partition_root(cs)) if (is_partition_root(cs))
...@@ -2860,7 +2893,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) ...@@ -2860,7 +2893,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
clear_bit(CS_ONLINE, &cs->flags); clear_bit(CS_ONLINE, &cs->flags);
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
put_online_cpus(); cpus_read_unlock();
} }
static void cpuset_css_free(struct cgroup_subsys_state *css) static void cpuset_css_free(struct cgroup_subsys_state *css)
...@@ -3093,8 +3126,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -3093,8 +3126,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
if (is_partition_root(cs) && (cpumask_empty(&new_cpus) || if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
(parent->partition_root_state == PRS_ERROR))) { (parent->partition_root_state == PRS_ERROR))) {
if (cs->nr_subparts_cpus) { if (cs->nr_subparts_cpus) {
spin_lock_irq(&callback_lock);
cs->nr_subparts_cpus = 0; cs->nr_subparts_cpus = 0;
cpumask_clear(cs->subparts_cpus); cpumask_clear(cs->subparts_cpus);
spin_unlock_irq(&callback_lock);
compute_effective_cpumask(&new_cpus, cs, parent); compute_effective_cpumask(&new_cpus, cs, parent);
} }
...@@ -3106,9 +3141,17 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -3106,9 +3141,17 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
*/ */
if ((parent->partition_root_state == PRS_ERROR) || if ((parent->partition_root_state == PRS_ERROR) ||
cpumask_empty(&new_cpus)) { cpumask_empty(&new_cpus)) {
int old_prs;
update_parent_subparts_cpumask(cs, partcmd_disable, update_parent_subparts_cpumask(cs, partcmd_disable,
NULL, tmp); NULL, tmp);
old_prs = cs->partition_root_state;
if (old_prs != PRS_ERROR) {
spin_lock_irq(&callback_lock);
cs->partition_root_state = PRS_ERROR; cs->partition_root_state = PRS_ERROR;
spin_unlock_irq(&callback_lock);
notify_partition_change(cs, old_prs, PRS_ERROR);
}
} }
cpuset_force_rebuild(); cpuset_force_rebuild();
} }
...@@ -3179,6 +3222,13 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -3179,6 +3222,13 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
/*
* In the rare case that hotplug removes all the cpus in subparts_cpus,
* we assumed that cpus are updated.
*/
if (!cpus_updated && top_cpuset.nr_subparts_cpus)
cpus_updated = true;
/* synchronize cpus_allowed to cpu_active_mask */ /* synchronize cpus_allowed to cpu_active_mask */
if (cpus_updated) { if (cpus_updated) {
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment