Commit 0c7f293e authored by Waiman Long's avatar Waiman Long Committed by Tejun Heo

cgroup/cpuset: Add cpuset.cpus.exclusive.effective for v2

The creation of a cpuset partition means dedicating a set of exclusive
CPUs to be used by a particular partition only. These exclusive CPUs
will not be used by any cpusets outside of that partition.

To enable more flexibility in creating partitions, we need a way to
distribute exclusive CPUs that can be used in new partitions. Currently,
we have a subparts_cpus cpumask in struct cpuset that tracks only
the exclusive CPUs used by all the sub-partitions underneath a given
cpuset.

This patch reworks the way we do exclusive CPUs tracking. The
subparts_cpus is now renamed to effective_xcpus which tracks the
exclusive CPUs allocated to a partition root including those that are
further distributed down to sub-partitions underneath it. IOW, it also
includes the exclusive CPUs used by the current partition root. Note
that effective_xcpus can contain offline CPUs and it will always be a
subset of cpus_allowed.

The renamed effective_xcpus is now exposed via a new read-only
"cpuset.cpus.exclusive.effective" control file. The new effective_xcpus
cpumask should be set to cpus_allowed when a cpuset becomes a partition
root and be cleared if it is not a valid partition root.

In the next patch, we will enable write to another new control file to
enable further control of what can get into effective_xcpus.
Signed-off-by: default avatarWaiman Long <longman@redhat.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 6fcdb018
...@@ -78,7 +78,7 @@ enum prs_errcode { ...@@ -78,7 +78,7 @@ enum prs_errcode {
}; };
static const char * const perr_strings[] = { static const char * const perr_strings[] = {
[PERR_INVCPUS] = "Invalid cpu list in cpuset.cpus", [PERR_INVCPUS] = "Invalid cpu list in cpuset.cpus.exclusive",
[PERR_INVPARENT] = "Parent is an invalid partition root", [PERR_INVPARENT] = "Parent is an invalid partition root",
[PERR_NOTPART] = "Parent is not a partition root", [PERR_NOTPART] = "Parent is not a partition root",
[PERR_NOTEXCL] = "Cpu list in cpuset.cpus not exclusive", [PERR_NOTEXCL] = "Cpu list in cpuset.cpus not exclusive",
...@@ -121,14 +121,18 @@ struct cpuset { ...@@ -121,14 +121,18 @@ struct cpuset {
nodemask_t effective_mems; nodemask_t effective_mems;
/* /*
* CPUs allocated to child sub-partitions (default hierarchy only) * Exclusive CPUs dedicated to current cgroup (default hierarchy only)
* - CPUs granted by the parent = effective_cpus U subparts_cpus
* - effective_cpus and subparts_cpus are mutually exclusive.
* *
* effective_cpus contains only onlined CPUs, but subparts_cpus * This exclusive CPUs must be a subset of cpus_allowed. A parent
* may have offlined ones. * cgroup can only grant exclusive CPUs to one of its children.
*
* When the cgroup becomes a valid partition root, effective_xcpus
* defaults to cpus_allowed if not set. The effective_cpus of a valid
* partition root comes solely from its effective_xcpus and some of the
* effective_xcpus may be distributed to sub-partitions below & hence
* excluded from its effective_cpus.
*/ */
cpumask_var_t subparts_cpus; cpumask_var_t effective_xcpus;
/* /*
* This is old Memory Nodes tasks took on. * This is old Memory Nodes tasks took on.
...@@ -156,8 +160,8 @@ struct cpuset { ...@@ -156,8 +160,8 @@ struct cpuset {
/* for custom sched domain */ /* for custom sched domain */
int relax_domain_level; int relax_domain_level;
/* number of CPUs in subparts_cpus */ /* number of valid sub-partitions */
int nr_subparts_cpus; int nr_subparts;
/* partition root state */ /* partition root state */
int partition_root_state; int partition_root_state;
...@@ -185,6 +189,11 @@ struct cpuset { ...@@ -185,6 +189,11 @@ struct cpuset {
struct cgroup_file partition_file; struct cgroup_file partition_file;
}; };
/*
* Exclusive CPUs distributed out to sub-partitions of top_cpuset
*/
static cpumask_var_t subpartitions_cpus;
/* /*
* Partition root states: * Partition root states:
* *
...@@ -312,7 +321,7 @@ static inline int is_partition_invalid(const struct cpuset *cs) ...@@ -312,7 +321,7 @@ static inline int is_partition_invalid(const struct cpuset *cs)
*/ */
static inline void make_partition_invalid(struct cpuset *cs) static inline void make_partition_invalid(struct cpuset *cs)
{ {
if (is_partition_valid(cs)) if (cs->partition_root_state > 0)
cs->partition_root_state = -cs->partition_root_state; cs->partition_root_state = -cs->partition_root_state;
} }
...@@ -469,7 +478,7 @@ static inline bool partition_is_populated(struct cpuset *cs, ...@@ -469,7 +478,7 @@ static inline bool partition_is_populated(struct cpuset *cs,
if (cs->css.cgroup->nr_populated_csets) if (cs->css.cgroup->nr_populated_csets)
return true; return true;
if (!excluded_child && !cs->nr_subparts_cpus) if (!excluded_child && !cs->nr_subparts)
return cgroup_is_populated(cs->css.cgroup); return cgroup_is_populated(cs->css.cgroup);
rcu_read_lock(); rcu_read_lock();
...@@ -601,7 +610,7 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -601,7 +610,7 @@ static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
if (cs) { if (cs) {
pmask1 = &cs->cpus_allowed; pmask1 = &cs->cpus_allowed;
pmask2 = &cs->effective_cpus; pmask2 = &cs->effective_cpus;
pmask3 = &cs->subparts_cpus; pmask3 = &cs->effective_xcpus;
} else { } else {
pmask1 = &tmp->new_cpus; pmask1 = &tmp->new_cpus;
pmask2 = &tmp->addmask; pmask2 = &tmp->addmask;
...@@ -636,7 +645,7 @@ static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -636,7 +645,7 @@ static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
if (cs) { if (cs) {
free_cpumask_var(cs->cpus_allowed); free_cpumask_var(cs->cpus_allowed);
free_cpumask_var(cs->effective_cpus); free_cpumask_var(cs->effective_cpus);
free_cpumask_var(cs->subparts_cpus); free_cpumask_var(cs->effective_xcpus);
} }
if (tmp) { if (tmp) {
free_cpumask_var(tmp->new_cpus); free_cpumask_var(tmp->new_cpus);
...@@ -664,6 +673,7 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs) ...@@ -664,6 +673,7 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
cpumask_copy(trial->cpus_allowed, cs->cpus_allowed); cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
cpumask_copy(trial->effective_cpus, cs->effective_cpus); cpumask_copy(trial->effective_cpus, cs->effective_cpus);
cpumask_copy(trial->effective_xcpus, cs->effective_xcpus);
return trial; return trial;
} }
...@@ -677,6 +687,25 @@ static inline void free_cpuset(struct cpuset *cs) ...@@ -677,6 +687,25 @@ static inline void free_cpuset(struct cpuset *cs)
kfree(cs); kfree(cs);
} }
/*
* cpu_exclusive_check() - check if two cpusets are exclusive
*
* Return 0 if exclusive, -EINVAL if not
*/
static inline bool cpu_exclusive_check(struct cpuset *cs1, struct cpuset *cs2)
{
struct cpumask *cpus1, *cpus2;
cpus1 = cpumask_empty(cs1->effective_xcpus)
? cs1->cpus_allowed : cs1->effective_xcpus;
cpus2 = cpumask_empty(cs2->effective_xcpus)
? cs2->cpus_allowed : cs2->effective_xcpus;
if (cpumask_intersects(cpus1, cpus2))
return -EINVAL;
return 0;
}
/* /*
* validate_change_legacy() - Validate conditions specific to legacy (v1) * validate_change_legacy() - Validate conditions specific to legacy (v1)
* behavior. * behavior.
...@@ -776,9 +805,10 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) ...@@ -776,9 +805,10 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
ret = -EINVAL; ret = -EINVAL;
cpuset_for_each_child(c, css, par) { cpuset_for_each_child(c, css, par) {
if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) && if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
c != cur && c != cur) {
cpumask_intersects(trial->cpus_allowed, c->cpus_allowed)) if (cpu_exclusive_check(trial, c))
goto out; goto out;
}
if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) && if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
c != cur && c != cur &&
nodes_intersects(trial->mems_allowed, c->mems_allowed)) nodes_intersects(trial->mems_allowed, c->mems_allowed))
...@@ -908,7 +938,7 @@ static int generate_sched_domains(cpumask_var_t **domains, ...@@ -908,7 +938,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
csa = NULL; csa = NULL;
/* Special case for the 99% of systems with one, full, sched domain */ /* Special case for the 99% of systems with one, full, sched domain */
if (root_load_balance && !top_cpuset.nr_subparts_cpus) { if (root_load_balance && !top_cpuset.nr_subparts) {
ndoms = 1; ndoms = 1;
doms = alloc_sched_domains(ndoms); doms = alloc_sched_domains(ndoms);
if (!doms) if (!doms)
...@@ -1159,7 +1189,7 @@ static void rebuild_sched_domains_locked(void) ...@@ -1159,7 +1189,7 @@ static void rebuild_sched_domains_locked(void)
* should be the same as the active CPUs, so checking only top_cpuset * should be the same as the active CPUs, so checking only top_cpuset
* is enough to detect racing CPU offlines. * is enough to detect racing CPU offlines.
*/ */
if (!top_cpuset.nr_subparts_cpus && if (cpumask_empty(subpartitions_cpus) &&
!cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask)) !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
return; return;
...@@ -1168,7 +1198,7 @@ static void rebuild_sched_domains_locked(void) ...@@ -1168,7 +1198,7 @@ static void rebuild_sched_domains_locked(void)
* root should be only a subset of the active CPUs. Since a CPU in any * root should be only a subset of the active CPUs. Since a CPU in any
* partition root could be offlined, all must be checked. * partition root could be offlined, all must be checked.
*/ */
if (top_cpuset.nr_subparts_cpus) { if (top_cpuset.nr_subparts) {
rcu_read_lock(); rcu_read_lock();
cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
if (!is_partition_valid(cs)) { if (!is_partition_valid(cs)) {
...@@ -1232,7 +1262,7 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) ...@@ -1232,7 +1262,7 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
*/ */
if (kthread_is_per_cpu(task)) if (kthread_is_per_cpu(task))
continue; continue;
cpumask_andnot(new_cpus, possible_mask, cs->subparts_cpus); cpumask_andnot(new_cpus, possible_mask, subpartitions_cpus);
} else { } else {
cpumask_and(new_cpus, possible_mask, cs->effective_cpus); cpumask_and(new_cpus, possible_mask, cs->effective_cpus);
} }
...@@ -1247,31 +1277,21 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus) ...@@ -1247,31 +1277,21 @@ static void update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus)
* @cs: the cpuset the need to recompute the new effective_cpus mask * @cs: the cpuset the need to recompute the new effective_cpus mask
* @parent: the parent cpuset * @parent: the parent cpuset
* *
* If the parent has subpartition CPUs, include them in the list of * The result is valid only if the given cpuset isn't a partition root.
* allowable CPUs in computing the new effective_cpus mask. Since offlined
* CPUs are not removed from subparts_cpus, we have to use cpu_active_mask
* to mask those out.
*/ */
static void compute_effective_cpumask(struct cpumask *new_cpus, static void compute_effective_cpumask(struct cpumask *new_cpus,
struct cpuset *cs, struct cpuset *parent) struct cpuset *cs, struct cpuset *parent)
{ {
if (parent->nr_subparts_cpus && is_partition_valid(cs)) {
cpumask_or(new_cpus, parent->effective_cpus,
parent->subparts_cpus);
cpumask_and(new_cpus, new_cpus, cs->cpus_allowed);
cpumask_and(new_cpus, new_cpus, cpu_active_mask);
} else {
cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus); cpumask_and(new_cpus, cs->cpus_allowed, parent->effective_cpus);
}
} }
/* /*
* Commands for update_parent_subparts_cpumask * Commands for update_parent_effective_cpumask
*/ */
enum subparts_cmd { enum partition_cmd {
partcmd_enable, /* Enable partition root */ partcmd_enable, /* Enable partition root */
partcmd_disable, /* Disable partition root */ partcmd_disable, /* Disable partition root */
partcmd_update, /* Update parent's subparts_cpus */ partcmd_update, /* Update parent's effective_cpus */
partcmd_invalidate, /* Make partition invalid */ partcmd_invalidate, /* Make partition invalid */
}; };
...@@ -1333,8 +1353,23 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs) ...@@ -1333,8 +1353,23 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
rebuild_sched_domains_locked(); rebuild_sched_domains_locked();
} }
/*
* tasks_nocpu_error - Return true if tasks will have no effective_cpus
*/
static bool tasks_nocpu_error(struct cpuset *parent, struct cpuset *cs,
struct cpumask *xcpus)
{
/*
* A populated partition (cs or parent) can't have empty effective_cpus
*/
return (cpumask_subset(parent->effective_cpus, xcpus) &&
partition_is_populated(parent, cs)) ||
(!cpumask_intersects(xcpus, cpu_active_mask) &&
partition_is_populated(cs, NULL));
}
/** /**
* update_parent_subparts_cpumask - update subparts_cpus mask of parent cpuset * update_parent_effective_cpumask - update effective_cpus mask of parent cpuset
* @cs: The cpuset that requests change in partition root state * @cs: The cpuset that requests change in partition root state
* @cmd: Partition root state change command * @cmd: Partition root state change command
* @newmask: Optional new cpumask for partcmd_update * @newmask: Optional new cpumask for partcmd_update
...@@ -1342,21 +1377,20 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs) ...@@ -1342,21 +1377,20 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
* Return: 0 or a partition root state error code * Return: 0 or a partition root state error code
* *
* For partcmd_enable, the cpuset is being transformed from a non-partition * For partcmd_enable, the cpuset is being transformed from a non-partition
* root to a partition root. The cpus_allowed mask of the given cpuset will * root to a partition root. The effective_xcpus (cpus_allowed if effective_xcpus
* be put into parent's subparts_cpus and taken away from parent's * not set) mask of the given cpuset will be taken away from parent's
* effective_cpus. The function will return 0 if all the CPUs listed in * effective_cpus. The function will return 0 if all the CPUs listed in
* cpus_allowed can be granted or an error code will be returned. * effective_xcpus can be granted or an error code will be returned.
* *
* For partcmd_disable, the cpuset is being transformed from a partition * For partcmd_disable, the cpuset is being transformed from a partition
* root back to a non-partition root. Any CPUs in cpus_allowed that are in * root back to a non-partition root. Any CPUs in effective_xcpus will be
* parent's subparts_cpus will be taken away from that cpumask and put back * given back to parent's effective_cpus. 0 will always be returned.
* into parent's effective_cpus. 0 will always be returned.
* *
* For partcmd_update, if the optional newmask is specified, the cpu list is * For partcmd_update, if the optional newmask is specified, the cpu list is
* to be changed from cpus_allowed to newmask. Otherwise, cpus_allowed is * to be changed from effective_xcpus to newmask. Otherwise, effective_xcpus is
* assumed to remain the same. The cpuset should either be a valid or invalid * assumed to remain the same. The cpuset should either be a valid or invalid
* partition root. The partition root state may change from valid to invalid * partition root. The partition root state may change from valid to invalid
* or vice versa. An error code will only be returned if transitioning from * or vice versa. An error code will be returned if transitioning from
* invalid to valid violates the exclusivity rule. * invalid to valid violates the exclusivity rule.
* *
* For partcmd_invalidate, the current partition will be made invalid. * For partcmd_invalidate, the current partition will be made invalid.
...@@ -1371,18 +1405,47 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs) ...@@ -1371,18 +1405,47 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
* check for error and so partition_root_state and prs_error will be updated * check for error and so partition_root_state and prs_error will be updated
* directly. * directly.
*/ */
static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
struct cpumask *newmask, struct cpumask *newmask,
struct tmpmasks *tmp) struct tmpmasks *tmp)
{ {
struct cpuset *parent = parent_cs(cs); struct cpuset *parent = parent_cs(cs);
int adding; /* Moving cpus from effective_cpus to subparts_cpus */ int adding; /* Adding cpus to parent's effective_cpus */
int deleting; /* Moving cpus from subparts_cpus to effective_cpus */ int deleting; /* Deleting cpus from parent's effective_cpus */
int old_prs, new_prs; int old_prs, new_prs;
int part_error = PERR_NONE; /* Partition error? */ int part_error = PERR_NONE; /* Partition error? */
int subparts_delta = 0;
struct cpumask *xcpus; /* cs effective_xcpus */
bool nocpu;
lockdep_assert_held(&cpuset_mutex); lockdep_assert_held(&cpuset_mutex);
/*
* new_prs will only be changed for the partcmd_update and
* partcmd_invalidate commands.
*/
adding = deleting = false;
old_prs = new_prs = cs->partition_root_state;
xcpus = !cpumask_empty(cs->effective_xcpus)
? cs->effective_xcpus : cs->cpus_allowed;
if (cmd == partcmd_invalidate) {
if (is_prs_invalid(old_prs))
return 0;
/*
* Make the current partition invalid.
*/
if (is_partition_valid(parent))
adding = cpumask_and(tmp->addmask,
xcpus, parent->effective_xcpus);
if (old_prs > 0) {
new_prs = -old_prs;
subparts_delta--;
}
goto write_error;
}
/* /*
* The parent must be a partition root. * The parent must be a partition root.
* The new cpumask, if present, or the current cpus_allowed must * The new cpumask, if present, or the current cpus_allowed must
...@@ -1395,124 +1458,124 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1395,124 +1458,124 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
if (!newmask && cpumask_empty(cs->cpus_allowed)) if (!newmask && cpumask_empty(cs->cpus_allowed))
return PERR_CPUSEMPTY; return PERR_CPUSEMPTY;
/* nocpu = tasks_nocpu_error(parent, cs, xcpus);
* new_prs will only be changed for the partcmd_update and
* partcmd_invalidate commands.
*/
adding = deleting = false;
old_prs = new_prs = cs->partition_root_state;
if (cmd == partcmd_enable) { if (cmd == partcmd_enable) {
/* /*
* Enabling partition root is not allowed if cpus_allowed * Enabling partition root is not allowed if its
* doesn't overlap parent's cpus_allowed. * effective_xcpus is empty or doesn't overlap with
* parent's effective_xcpus.
*/ */
if (!cpumask_intersects(cs->cpus_allowed, parent->cpus_allowed)) if (cpumask_empty(xcpus) ||
!cpumask_intersects(xcpus, parent->effective_xcpus))
return PERR_INVCPUS; return PERR_INVCPUS;
/* /*
* A parent can be left with no CPU as long as there is no * A parent can be left with no CPU as long as there is no
* task directly associated with the parent partition. * task directly associated with the parent partition.
*/ */
if (cpumask_subset(parent->effective_cpus, cs->cpus_allowed) && if (nocpu)
partition_is_populated(parent, cs))
return PERR_NOCPUS; return PERR_NOCPUS;
cpumask_copy(tmp->addmask, cs->cpus_allowed); cpumask_copy(tmp->delmask, xcpus);
adding = true; deleting = true;
subparts_delta++;
} else if (cmd == partcmd_disable) { } else if (cmd == partcmd_disable) {
/* /*
* Need to remove cpus from parent's subparts_cpus for valid n* May need to add cpus to parent's effective_cpus for
* partition root. * valid partition root.
*/ */
deleting = !is_prs_invalid(old_prs) && adding = !is_prs_invalid(old_prs) &&
cpumask_and(tmp->delmask, cs->cpus_allowed, cpumask_and(tmp->addmask, xcpus, parent->effective_xcpus);
parent->subparts_cpus); if (adding)
} else if (cmd == partcmd_invalidate) { subparts_delta--;
if (is_prs_invalid(old_prs)) } else if (newmask) {
return 0;
/* /*
* Make the current partition invalid. It is assumed that * Empty cpumask is not allowed
* invalidation is caused by violating cpu exclusivity rule.
*/ */
deleting = cpumask_and(tmp->delmask, cs->cpus_allowed, if (cpumask_empty(newmask)) {
parent->subparts_cpus); part_error = PERR_CPUSEMPTY;
if (old_prs > 0) { goto write_error;
new_prs = -old_prs;
part_error = PERR_NOTEXCL;
} }
} else if (newmask) {
/* /*
* partcmd_update with newmask: * partcmd_update with newmask:
* *
* Compute add/delete mask to/from subparts_cpus * Compute add/delete mask to/from effective_cpus
* *
* delmask = cpus_allowed & ~newmask & parent->subparts_cpus * addmask = effective_xcpus & ~newmask & parent->effective_xcpus
* addmask = newmask & parent->cpus_allowed * delmask = newmask & ~cs->effective_xcpus
* & ~parent->subparts_cpus * & parent->effective_xcpus
*/ */
cpumask_andnot(tmp->delmask, cs->cpus_allowed, newmask); cpumask_andnot(tmp->addmask, xcpus, newmask);
deleting = cpumask_and(tmp->delmask, tmp->delmask, adding = cpumask_and(tmp->addmask, tmp->addmask,
parent->subparts_cpus); parent->effective_xcpus);
cpumask_and(tmp->addmask, newmask, parent->cpus_allowed); cpumask_andnot(tmp->delmask, newmask, xcpus);
adding = cpumask_andnot(tmp->addmask, tmp->addmask, deleting = cpumask_and(tmp->delmask, tmp->delmask,
parent->subparts_cpus); parent->effective_xcpus);
/*
* Empty cpumask is not allowed
*/
if (cpumask_empty(newmask)) {
part_error = PERR_CPUSEMPTY;
/* /*
* Make partition invalid if parent's effective_cpus could * Make partition invalid if parent's effective_cpus could
* become empty and there are tasks in the parent. * become empty and there are tasks in the parent.
*/ */
} else if (adding && if (nocpu && (!adding ||
cpumask_subset(parent->effective_cpus, tmp->addmask) && !cpumask_intersects(tmp->addmask, cpu_active_mask))) {
!cpumask_intersects(tmp->delmask, cpu_active_mask) &&
partition_is_populated(parent, cs)) {
part_error = PERR_NOCPUS; part_error = PERR_NOCPUS;
adding = false; deleting = false;
deleting = cpumask_and(tmp->delmask, cs->cpus_allowed, adding = cpumask_and(tmp->addmask,
parent->subparts_cpus); xcpus, parent->effective_xcpus);
} }
} else { } else {
/* /*
* partcmd_update w/o newmask: * partcmd_update w/o newmask
*
* delmask = effective_xcpus & parent->effective_cpus
* *
* delmask = cpus_allowed & parent->subparts_cpus * This can be called from:
* addmask = cpus_allowed & parent->cpus_allowed * 1) update_cpumasks_hier()
* & ~parent->subparts_cpus * 2) cpuset_hotplug_update_tasks()
* *
* This gets invoked either due to a hotplug event or from * Check to see if it can be transitioned from valid to
* update_cpumasks_hier(). This can cause the state of a * invalid partition or vice versa.
* partition root to transition from valid to invalid or vice *
* versa. So we still need to compute the addmask and delmask. * A partition error happens when parent has tasks and all
* its effective CPUs will have to be distributed out.
* A partition error happens when: */
* 1) Cpuset is valid partition, but parent does not distribute WARN_ON_ONCE(!is_partition_valid(parent));
* out any CPUs. if (nocpu) {
* 2) Parent has tasks and all its effective CPUs will have
* to be distributed out.
*/
cpumask_and(tmp->addmask, cs->cpus_allowed,
parent->cpus_allowed);
adding = cpumask_andnot(tmp->addmask, tmp->addmask,
parent->subparts_cpus);
if ((is_partition_valid(cs) && !parent->nr_subparts_cpus) ||
(adding &&
cpumask_subset(parent->effective_cpus, tmp->addmask) &&
partition_is_populated(parent, cs))) {
part_error = PERR_NOCPUS; part_error = PERR_NOCPUS;
adding = false; if (is_partition_valid(cs))
} adding = cpumask_and(tmp->addmask,
xcpus, parent->effective_xcpus);
} else if (is_partition_invalid(cs) &&
cpumask_subset(xcpus, parent->effective_xcpus)) {
struct cgroup_subsys_state *css;
struct cpuset *child;
bool exclusive = true;
if (part_error && is_partition_valid(cs) && /*
parent->nr_subparts_cpus) * Convert invalid partition to valid has to
deleting = cpumask_and(tmp->delmask, cs->cpus_allowed, * pass the cpu exclusivity test.
parent->subparts_cpus); */
rcu_read_lock();
cpuset_for_each_child(child, css, parent) {
if (child == cs)
continue;
if (cpu_exclusive_check(cs, child)) {
exclusive = false;
break;
} }
}
rcu_read_unlock();
if (exclusive)
deleting = cpumask_and(tmp->delmask,
xcpus, parent->effective_cpus);
else
part_error = PERR_NOTEXCL;
}
}
write_error:
if (part_error) if (part_error)
WRITE_ONCE(cs->prs_err, part_error); WRITE_ONCE(cs->prs_err, part_error);
...@@ -1524,13 +1587,17 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1524,13 +1587,17 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
switch (cs->partition_root_state) { switch (cs->partition_root_state) {
case PRS_ROOT: case PRS_ROOT:
case PRS_ISOLATED: case PRS_ISOLATED:
if (part_error) if (part_error) {
new_prs = -old_prs; new_prs = -old_prs;
subparts_delta--;
}
break; break;
case PRS_INVALID_ROOT: case PRS_INVALID_ROOT:
case PRS_INVALID_ISOLATED: case PRS_INVALID_ISOLATED:
if (!part_error) if (!part_error) {
new_prs = -old_prs; new_prs = -old_prs;
subparts_delta++;
}
break; break;
} }
} }
...@@ -1550,32 +1617,43 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1550,32 +1617,43 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
} }
/* /*
* Change the parent's subparts_cpus. * Change the parent's effective_cpus & effective_xcpus (top cpuset
* only).
*
* Newly added CPUs will be removed from effective_cpus and * Newly added CPUs will be removed from effective_cpus and
* newly deleted ones will be added back to effective_cpus. * newly deleted ones will be added back to effective_cpus.
*/ */
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
if (adding) { if (adding) {
cpumask_or(parent->subparts_cpus, if (parent == &top_cpuset)
parent->subparts_cpus, tmp->addmask); cpumask_andnot(subpartitions_cpus,
cpumask_andnot(parent->effective_cpus, subpartitions_cpus, tmp->addmask);
parent->effective_cpus, tmp->addmask);
}
if (deleting) {
cpumask_andnot(parent->subparts_cpus,
parent->subparts_cpus, tmp->delmask);
/* /*
* Some of the CPUs in subparts_cpus might have been offlined. * Some of the CPUs in effective_xcpus might have been offlined.
*/ */
cpumask_and(tmp->delmask, tmp->delmask, cpu_active_mask);
cpumask_or(parent->effective_cpus, cpumask_or(parent->effective_cpus,
parent->effective_cpus, tmp->addmask);
cpumask_and(parent->effective_cpus,
parent->effective_cpus, cpu_active_mask);
}
if (deleting) {
if (parent == &top_cpuset)
cpumask_or(subpartitions_cpus,
subpartitions_cpus, tmp->delmask);
cpumask_andnot(parent->effective_cpus,
parent->effective_cpus, tmp->delmask); parent->effective_cpus, tmp->delmask);
} }
parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus); if (is_partition_valid(parent)) {
parent->nr_subparts += subparts_delta;
WARN_ON_ONCE(parent->nr_subparts < 0);
}
if (old_prs != new_prs) if (old_prs != new_prs) {
cs->partition_root_state = new_prs; cs->partition_root_state = new_prs;
if (new_prs <= 0)
cs->nr_subparts = 0;
}
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
...@@ -1600,6 +1678,71 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1600,6 +1678,71 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
return 0; return 0;
} }
/**
* compute_partition_effective_cpumask - compute effective_cpus for partition
* @cs: partition root cpuset
* @new_ecpus: previously computed effective_cpus to be updated
*
* Compute the effective_cpus of a partition root by scanning effective_xcpus
* of child partition roots and exclusing their effective_xcpus.
*
* This has the side effect of invalidating valid child partition roots,
* if necessary. Since it is called from either cpuset_hotplug_update_tasks()
* or update_cpumasks_hier() where parent and children are modified
* successively, we don't need to call update_parent_effective_cpumask()
* and the child's effective_cpus will be updated in later iterations.
*
* Note that rcu_read_lock() is assumed to be held.
*/
static void compute_partition_effective_cpumask(struct cpuset *cs,
struct cpumask *new_ecpus)
{
struct cgroup_subsys_state *css;
struct cpuset *child;
bool populated = partition_is_populated(cs, NULL);
/*
* Check child partition roots to see if they should be
* invalidated when
* 1) child effective_xcpus not a subset of new
* excluisve_cpus
* 2) All the effective_cpus will be used up and cp
* has tasks
*/
cpumask_and(new_ecpus, cs->effective_xcpus, cpu_active_mask);
rcu_read_lock();
cpuset_for_each_child(child, css, cs) {
if (!is_partition_valid(child))
continue;
child->prs_err = 0;
if (!cpumask_subset(child->effective_xcpus,
cs->effective_xcpus))
child->prs_err = PERR_INVCPUS;
else if (populated &&
cpumask_subset(new_ecpus, child->effective_xcpus))
child->prs_err = PERR_NOCPUS;
if (child->prs_err) {
int old_prs = child->partition_root_state;
/*
* Invalidate child partition
*/
spin_lock_irq(&callback_lock);
make_partition_invalid(child);
cs->nr_subparts--;
child->nr_subparts = 0;
spin_unlock_irq(&callback_lock);
notify_partition_change(child, old_prs);
continue;
}
cpumask_andnot(new_ecpus, new_ecpus,
child->effective_xcpus);
}
rcu_read_unlock();
}
/* /*
* update_cpumasks_hier() flags * update_cpumasks_hier() flags
*/ */
...@@ -1634,6 +1777,19 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, ...@@ -1634,6 +1777,19 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
compute_effective_cpumask(tmp->new_cpus, cp, parent); compute_effective_cpumask(tmp->new_cpus, cp, parent);
if (is_partition_valid(parent) && is_partition_valid(cp))
compute_partition_effective_cpumask(cp, tmp->new_cpus);
/*
* A partition with no effective_cpus is allowed as long as
* there is no task associated with it. Call
* update_parent_effective_cpumask() to check it.
*/
if (is_partition_valid(cp) && cpumask_empty(tmp->new_cpus)) {
update_parent = true;
goto update_parent_effective;
}
/* /*
* If it becomes empty, inherit the effective mask of the * If it becomes empty, inherit the effective mask of the
* parent, which is guaranteed to have some CPUs unless * parent, which is guaranteed to have some CPUs unless
...@@ -1641,10 +1797,6 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, ...@@ -1641,10 +1797,6 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
* out all its CPUs. * out all its CPUs.
*/ */
if (is_in_v2_mode() && cpumask_empty(tmp->new_cpus)) { if (is_in_v2_mode() && cpumask_empty(tmp->new_cpus)) {
if (is_partition_valid(cp) &&
cpumask_equal(cp->cpus_allowed, cp->subparts_cpus))
goto update_parent_subparts;
cpumask_copy(tmp->new_cpus, parent->effective_cpus); cpumask_copy(tmp->new_cpus, parent->effective_cpus);
if (!cp->use_parent_ecpus) { if (!cp->use_parent_ecpus) {
cp->use_parent_ecpus = true; cp->use_parent_ecpus = true;
...@@ -1671,12 +1823,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, ...@@ -1671,12 +1823,12 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
continue; continue;
} }
update_parent_subparts: update_parent_effective:
/* /*
* update_parent_subparts_cpumask() should have been called * update_parent_effective_cpumask() should have been called
* for cs already in update_cpumask(). We should also call * for cs already in update_cpumask(). We should also call
* update_tasks_cpumask() again for tasks in the parent * update_tasks_cpumask() again for tasks in the parent
* cpuset if the parent's subparts_cpus changes. * cpuset if the parent's effective_cpus changes.
*/ */
old_prs = new_prs = cp->partition_root_state; old_prs = new_prs = cp->partition_root_state;
if ((cp != cs) && old_prs) { if ((cp != cs) && old_prs) {
...@@ -1706,8 +1858,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, ...@@ -1706,8 +1858,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
rcu_read_unlock(); rcu_read_unlock();
if (update_parent) { if (update_parent) {
update_parent_subparts_cpumask(cp, partcmd_update, NULL, update_parent_effective_cpumask(cp, partcmd_update, NULL, tmp);
tmp);
/* /*
* The cpuset partition_root_state may become * The cpuset partition_root_state may become
* invalid. Capture it. * invalid. Capture it.
...@@ -1716,30 +1867,18 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, ...@@ -1716,30 +1867,18 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
} }
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
if (cp->nr_subparts_cpus && !is_partition_valid(cp)) {
/*
* Put all active subparts_cpus back to effective_cpus.
*/
cpumask_or(tmp->new_cpus, tmp->new_cpus,
cp->subparts_cpus);
cpumask_and(tmp->new_cpus, tmp->new_cpus,
cpu_active_mask);
cp->nr_subparts_cpus = 0;
cpumask_clear(cp->subparts_cpus);
}
cpumask_copy(cp->effective_cpus, tmp->new_cpus); cpumask_copy(cp->effective_cpus, tmp->new_cpus);
if (cp->nr_subparts_cpus) {
/*
* Make sure that effective_cpus & subparts_cpus
* are mutually exclusive.
*/
cpumask_andnot(cp->effective_cpus, cp->effective_cpus,
cp->subparts_cpus);
}
cp->partition_root_state = new_prs; cp->partition_root_state = new_prs;
if ((new_prs > 0) && cpumask_empty(cp->effective_xcpus))
cpumask_and(cp->effective_xcpus,
cp->cpus_allowed, parent->effective_xcpus);
if (new_prs < 0) {
/* Reset partition data */
cp->nr_subparts = 0;
cpumask_clear(cp->effective_xcpus);
if (is_cpu_exclusive(cp))
clear_bit(CS_CPU_EXCLUSIVE, &cp->flags);
}
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
notify_partition_change(cp, old_prs); notify_partition_change(cp, old_prs);
...@@ -1836,6 +1975,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1836,6 +1975,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
{ {
int retval; int retval;
struct tmpmasks tmp; struct tmpmasks tmp;
struct cpuset *parent = parent_cs(cs);
bool invalidate = false; bool invalidate = false;
int old_prs = cs->partition_root_state; int old_prs = cs->partition_root_state;
...@@ -1851,6 +1991,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1851,6 +1991,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
*/ */
if (!*buf) { if (!*buf) {
cpumask_clear(trialcs->cpus_allowed); cpumask_clear(trialcs->cpus_allowed);
cpumask_clear(trialcs->effective_xcpus);
} else { } else {
retval = cpulist_parse(buf, trialcs->cpus_allowed); retval = cpulist_parse(buf, trialcs->cpus_allowed);
if (retval < 0) if (retval < 0)
...@@ -1859,6 +2000,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1859,6 +2000,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (!cpumask_subset(trialcs->cpus_allowed, if (!cpumask_subset(trialcs->cpus_allowed,
top_cpuset.cpus_allowed)) top_cpuset.cpus_allowed))
return -EINVAL; return -EINVAL;
/*
* When effective_xcpus is set, make sure it is a subset of
* cpus_allowed and parent's effective_xcpus.
*/
cpumask_and(trialcs->effective_xcpus,
parent->effective_xcpus, trialcs->cpus_allowed);
} }
/* Nothing to do if the cpus didn't change */ /* Nothing to do if the cpus didn't change */
...@@ -1868,11 +2016,21 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1868,11 +2016,21 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
if (alloc_cpumasks(NULL, &tmp)) if (alloc_cpumasks(NULL, &tmp))
return -ENOMEM; return -ENOMEM;
if (is_partition_valid(cs)) {
if (cpumask_empty(trialcs->effective_xcpus)) {
invalidate = true;
cs->prs_err = PERR_INVCPUS;
} else if (tasks_nocpu_error(parent, cs, trialcs->effective_xcpus)) {
invalidate = true;
cs->prs_err = PERR_NOCPUS;
}
}
retval = validate_change(cs, trialcs); retval = validate_change(cs, trialcs);
if ((retval == -EINVAL) && cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) { if ((retval == -EINVAL) && cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) {
struct cpuset *cp, *parent;
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
struct cpuset *cp;
/* /*
* The -EINVAL error code indicates that partition sibling * The -EINVAL error code indicates that partition sibling
...@@ -1883,69 +2041,44 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -1883,69 +2041,44 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
*/ */
invalidate = true; invalidate = true;
rcu_read_lock(); rcu_read_lock();
parent = parent_cs(cs);
cpuset_for_each_child(cp, css, parent) cpuset_for_each_child(cp, css, parent)
if (is_partition_valid(cp) && if (is_partition_valid(cp) &&
cpumask_intersects(trialcs->cpus_allowed, cp->cpus_allowed)) { cpumask_intersects(trialcs->effective_xcpus, cp->effective_xcpus)) {
rcu_read_unlock(); rcu_read_unlock();
update_parent_subparts_cpumask(cp, partcmd_invalidate, NULL, &tmp); update_parent_effective_cpumask(cp, partcmd_invalidate, NULL, &tmp);
rcu_read_lock(); rcu_read_lock();
} }
rcu_read_unlock(); rcu_read_unlock();
retval = 0; retval = 0;
} }
if (retval < 0) if (retval < 0)
goto out_free; goto out_free;
if (cs->partition_root_state) { if (cs->partition_root_state) {
if (invalidate) if (invalidate)
update_parent_subparts_cpumask(cs, partcmd_invalidate, update_parent_effective_cpumask(cs, partcmd_invalidate,
NULL, &tmp); NULL, &tmp);
else else
update_parent_subparts_cpumask(cs, partcmd_update, update_parent_effective_cpumask(cs, partcmd_update,
trialcs->cpus_allowed, &tmp); trialcs->effective_xcpus, &tmp);
} }
compute_effective_cpumask(trialcs->effective_cpus, trialcs,
parent_cs(cs));
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
if (!is_partition_valid(cs))
cpumask_clear(cs->effective_xcpus);
else
cpumask_copy(cs->effective_xcpus, trialcs->effective_xcpus);
/*
* Make sure that subparts_cpus, if not empty, is a subset of
* cpus_allowed. Clear subparts_cpus if partition not valid or
* empty effective cpus with tasks.
*/
if (cs->nr_subparts_cpus) {
if (!is_partition_valid(cs) ||
(cpumask_subset(trialcs->effective_cpus, cs->subparts_cpus) &&
partition_is_populated(cs, NULL))) {
cs->nr_subparts_cpus = 0;
cpumask_clear(cs->subparts_cpus);
} else {
cpumask_and(cs->subparts_cpus, cs->subparts_cpus,
cs->cpus_allowed);
cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
}
}
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
/* effective_cpus will be updated here */ /* effective_cpus will be updated here */
update_cpumasks_hier(cs, &tmp, 0); update_cpumasks_hier(cs, &tmp, 0);
if (cs->partition_root_state) { /* Update CS_SCHED_LOAD_BALANCE and/or sched_domains, if necessary */
struct cpuset *parent = parent_cs(cs); if (cs->partition_root_state)
/*
* For partition root, update the cpumasks of sibling
* cpusets if they use parent's effective_cpus.
*/
if (parent->child_ecpus_count)
update_sibling_cpumasks(parent, cs, &tmp);
/* Update CS_SCHED_LOAD_BALANCE and/or sched_domains */
update_partition_sd_lb(cs, old_prs); update_partition_sd_lb(cs, old_prs);
}
out_free: out_free:
free_cpumasks(NULL, &tmp); free_cpumasks(NULL, &tmp);
return 0; return 0;
...@@ -2323,7 +2456,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, ...@@ -2323,7 +2456,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
static int update_prstate(struct cpuset *cs, int new_prs) static int update_prstate(struct cpuset *cs, int new_prs)
{ {
int err = PERR_NONE, old_prs = cs->partition_root_state; int err = PERR_NONE, old_prs = cs->partition_root_state;
struct cpuset *parent = parent_cs(cs);
struct tmpmasks tmpmask; struct tmpmasks tmpmask;
if (old_prs == new_prs) if (old_prs == new_prs)
...@@ -2341,6 +2473,19 @@ static int update_prstate(struct cpuset *cs, int new_prs) ...@@ -2341,6 +2473,19 @@ static int update_prstate(struct cpuset *cs, int new_prs)
if (alloc_cpumasks(NULL, &tmpmask)) if (alloc_cpumasks(NULL, &tmpmask))
return -ENOMEM; return -ENOMEM;
/*
* Setup effective_xcpus if not set yet, it will be cleared later
* if partition becomes invalid.
*/
if ((new_prs > 0) && cpumask_empty(cs->effective_xcpus)) {
struct cpuset *parent = parent_cs(cs);
spin_lock_irq(&callback_lock);
cpumask_and(cs->effective_xcpus,
cs->cpus_allowed, parent->effective_xcpus);
spin_unlock_irq(&callback_lock);
}
err = update_partition_exclusive(cs, new_prs); err = update_partition_exclusive(cs, new_prs);
if (err) if (err)
goto out; goto out;
...@@ -2354,7 +2499,7 @@ static int update_prstate(struct cpuset *cs, int new_prs) ...@@ -2354,7 +2499,7 @@ static int update_prstate(struct cpuset *cs, int new_prs)
goto out; goto out;
} }
err = update_parent_subparts_cpumask(cs, partcmd_enable, err = update_parent_effective_cpumask(cs, partcmd_enable,
NULL, &tmpmask); NULL, &tmpmask);
} else if (old_prs && new_prs) { } else if (old_prs && new_prs) {
/* /*
...@@ -2366,19 +2511,13 @@ static int update_prstate(struct cpuset *cs, int new_prs) ...@@ -2366,19 +2511,13 @@ static int update_prstate(struct cpuset *cs, int new_prs)
* Switching back to member is always allowed even if it * Switching back to member is always allowed even if it
* disables child partitions. * disables child partitions.
*/ */
update_parent_subparts_cpumask(cs, partcmd_disable, NULL, update_parent_effective_cpumask(cs, partcmd_disable, NULL,
&tmpmask); &tmpmask);
/* /*
* If there are child partitions, they will all become invalid. * Invalidation of child partitions will be done in
* update_cpumasks_hier().
*/ */
if (unlikely(cs->nr_subparts_cpus)) {
spin_lock_irq(&callback_lock);
cs->nr_subparts_cpus = 0;
cpumask_clear(cs->subparts_cpus);
compute_effective_cpumask(cs->effective_cpus, cs, parent);
spin_unlock_irq(&callback_lock);
}
} }
out: out:
/* /*
...@@ -2393,13 +2532,11 @@ static int update_prstate(struct cpuset *cs, int new_prs) ...@@ -2393,13 +2532,11 @@ static int update_prstate(struct cpuset *cs, int new_prs)
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
cs->partition_root_state = new_prs; cs->partition_root_state = new_prs;
WRITE_ONCE(cs->prs_err, err); WRITE_ONCE(cs->prs_err, err);
if (!is_partition_valid(cs))
cpumask_clear(cs->effective_xcpus);
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
/* /* Force update if switching back to member */
* Update child cpusets, if present.
* Force update if switching back to member.
*/
if (!list_empty(&cs->css.children))
update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0); update_cpumasks_hier(cs, &tmpmask, !new_prs ? HIER_CHECKALL : 0);
/* Update sched domains and load balance flag */ /* Update sched domains and load balance flag */
...@@ -2649,7 +2786,7 @@ static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) ...@@ -2649,7 +2786,7 @@ static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
guarantee_online_cpus(task, cpus_attach); guarantee_online_cpus(task, cpus_attach);
else else
cpumask_andnot(cpus_attach, task_cpu_possible_mask(task), cpumask_andnot(cpus_attach, task_cpu_possible_mask(task),
cs->subparts_cpus); subpartitions_cpus);
/* /*
* can_attach beforehand should guarantee that this doesn't * can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here * fail. TODO: have a better way to handle failure here
...@@ -2752,6 +2889,7 @@ typedef enum { ...@@ -2752,6 +2889,7 @@ typedef enum {
FILE_EFFECTIVE_CPULIST, FILE_EFFECTIVE_CPULIST,
FILE_EFFECTIVE_MEMLIST, FILE_EFFECTIVE_MEMLIST,
FILE_SUBPARTS_CPULIST, FILE_SUBPARTS_CPULIST,
FILE_EFFECTIVE_XCPULIST,
FILE_CPU_EXCLUSIVE, FILE_CPU_EXCLUSIVE,
FILE_MEM_EXCLUSIVE, FILE_MEM_EXCLUSIVE,
FILE_MEM_HARDWALL, FILE_MEM_HARDWALL,
...@@ -2936,8 +3074,11 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) ...@@ -2936,8 +3074,11 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
case FILE_EFFECTIVE_MEMLIST: case FILE_EFFECTIVE_MEMLIST:
seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
break; break;
case FILE_EFFECTIVE_XCPULIST:
seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_xcpus));
break;
case FILE_SUBPARTS_CPULIST: case FILE_SUBPARTS_CPULIST:
seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->subparts_cpus)); seq_printf(sf, "%*pbl\n", cpumask_pr_args(subpartitions_cpus));
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
...@@ -3209,11 +3350,18 @@ static struct cftype dfl_files[] = { ...@@ -3209,11 +3350,18 @@ static struct cftype dfl_files[] = {
.file_offset = offsetof(struct cpuset, partition_file), .file_offset = offsetof(struct cpuset, partition_file),
}, },
{
.name = "cpus.exclusive.effective",
.seq_show = cpuset_common_seq_show,
.private = FILE_EFFECTIVE_XCPULIST,
.flags = CFTYPE_NOT_ON_ROOT,
},
{ {
.name = "cpus.subpartitions", .name = "cpus.subpartitions",
.seq_show = cpuset_common_seq_show, .seq_show = cpuset_common_seq_show,
.private = FILE_SUBPARTS_CPULIST, .private = FILE_SUBPARTS_CPULIST,
.flags = CFTYPE_DEBUG, .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_DEBUG,
}, },
{ } /* terminate */ { } /* terminate */
...@@ -3387,6 +3535,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) ...@@ -3387,6 +3535,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
if (is_in_v2_mode()) { if (is_in_v2_mode()) {
cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
cpumask_copy(top_cpuset.effective_xcpus, cpu_possible_mask);
top_cpuset.mems_allowed = node_possible_map; top_cpuset.mems_allowed = node_possible_map;
} else { } else {
cpumask_copy(top_cpuset.cpus_allowed, cpumask_copy(top_cpuset.cpus_allowed,
...@@ -3525,11 +3674,13 @@ int __init cpuset_init(void) ...@@ -3525,11 +3674,13 @@ int __init cpuset_init(void)
{ {
BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL)); BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_xcpus, GFP_KERNEL));
BUG_ON(!zalloc_cpumask_var(&subpartitions_cpus, GFP_KERNEL));
cpumask_setall(top_cpuset.cpus_allowed); cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed); nodes_setall(top_cpuset.mems_allowed);
cpumask_setall(top_cpuset.effective_cpus); cpumask_setall(top_cpuset.effective_cpus);
cpumask_setall(top_cpuset.effective_xcpus);
nodes_setall(top_cpuset.effective_mems); nodes_setall(top_cpuset.effective_mems);
fmeter_init(&top_cpuset.fmeter); fmeter_init(&top_cpuset.fmeter);
...@@ -3669,30 +3820,15 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -3669,30 +3820,15 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
compute_effective_cpumask(&new_cpus, cs, parent); compute_effective_cpumask(&new_cpus, cs, parent);
nodes_and(new_mems, cs->mems_allowed, parent->effective_mems); nodes_and(new_mems, cs->mems_allowed, parent->effective_mems);
if (cs->nr_subparts_cpus)
/*
* Make sure that CPUs allocated to child partitions
* do not show up in effective_cpus.
*/
cpumask_andnot(&new_cpus, &new_cpus, cs->subparts_cpus);
if (!tmp || !cs->partition_root_state) if (!tmp || !cs->partition_root_state)
goto update_tasks; goto update_tasks;
/* /*
* In the unlikely event that a partition root has empty * Compute effective_cpus for valid partition root, may invalidate
* effective_cpus with tasks, we will have to invalidate child * child partition roots if necessary.
* partitions, if present, by setting nr_subparts_cpus to 0 to
* reclaim their cpus.
*/ */
if (cs->nr_subparts_cpus && is_partition_valid(cs) && if (is_partition_valid(cs) && is_partition_valid(parent))
cpumask_empty(&new_cpus) && partition_is_populated(cs, NULL)) { compute_partition_effective_cpumask(cs, &new_cpus);
spin_lock_irq(&callback_lock);
cs->nr_subparts_cpus = 0;
cpumask_clear(cs->subparts_cpus);
spin_unlock_irq(&callback_lock);
compute_effective_cpumask(&new_cpus, cs, parent);
}
/* /*
* Force the partition to become invalid if either one of * Force the partition to become invalid if either one of
...@@ -3701,45 +3837,23 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -3701,45 +3837,23 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
* 2) parent is invalid or doesn't grant any cpus to child * 2) parent is invalid or doesn't grant any cpus to child
* partitions. * partitions.
*/ */
if (is_partition_valid(cs) && (!parent->nr_subparts_cpus || if (is_partition_valid(cs) && (!is_partition_valid(parent) ||
(cpumask_empty(&new_cpus) && partition_is_populated(cs, NULL)))) { tasks_nocpu_error(parent, cs, &new_cpus))) {
int old_prs, parent_prs; update_parent_effective_cpumask(cs, partcmd_invalidate, NULL, tmp);
update_parent_subparts_cpumask(cs, partcmd_disable, NULL, tmp);
if (cs->nr_subparts_cpus) {
spin_lock_irq(&callback_lock);
cs->nr_subparts_cpus = 0;
cpumask_clear(cs->subparts_cpus);
spin_unlock_irq(&callback_lock);
compute_effective_cpumask(&new_cpus, cs, parent); compute_effective_cpumask(&new_cpus, cs, parent);
}
old_prs = cs->partition_root_state;
parent_prs = parent->partition_root_state;
if (is_partition_valid(cs)) {
spin_lock_irq(&callback_lock);
make_partition_invalid(cs);
spin_unlock_irq(&callback_lock);
if (is_prs_invalid(parent_prs))
WRITE_ONCE(cs->prs_err, PERR_INVPARENT);
else if (!parent_prs)
WRITE_ONCE(cs->prs_err, PERR_NOTPART);
else
WRITE_ONCE(cs->prs_err, PERR_HOTPLUG);
notify_partition_change(cs, old_prs);
}
cpuset_force_rebuild(); cpuset_force_rebuild();
} }
/* /*
* On the other hand, an invalid partition root may be transitioned * On the other hand, an invalid partition root may be transitioned
* back to a regular one. * back to a regular one.
*/ */
else if (is_partition_valid(parent) && is_partition_invalid(cs)) { else if (is_partition_valid(parent) && is_partition_invalid(cs)) {
update_parent_subparts_cpumask(cs, partcmd_update, NULL, tmp); update_parent_effective_cpumask(cs, partcmd_update, NULL, tmp);
if (is_partition_valid(cs)) if (is_partition_valid(cs)) {
compute_partition_effective_cpumask(cs, &new_cpus);
cpuset_force_rebuild(); cpuset_force_rebuild();
} }
}
update_tasks: update_tasks:
cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus); cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
...@@ -3796,21 +3910,22 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -3796,21 +3910,22 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
new_mems = node_states[N_MEMORY]; new_mems = node_states[N_MEMORY];
/* /*
* If subparts_cpus is populated, it is likely that the check below * If subpartitions_cpus is populated, it is likely that the check
* will produce a false positive on cpus_updated when the cpu list * below will produce a false positive on cpus_updated when the cpu
* isn't changed. It is extra work, but it is better to be safe. * list isn't changed. It is extra work, but it is better to be safe.
*/ */
cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus); cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus) ||
!cpumask_empty(subpartitions_cpus);
mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
/* /*
* In the rare case that hotplug removes all the cpus in subparts_cpus, * In the rare case that hotplug removes all the cpus in
* we assumed that cpus are updated. * subpartitions_cpus, we assumed that cpus are updated.
*/ */
if (!cpus_updated && top_cpuset.nr_subparts_cpus) if (!cpus_updated && top_cpuset.nr_subparts)
cpus_updated = true; cpus_updated = true;
/* synchronize cpus_allowed to cpu_active_mask */ /* For v1, synchronize cpus_allowed to cpu_active_mask */
if (cpus_updated) { if (cpus_updated) {
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
if (!on_dfl) if (!on_dfl)
...@@ -3818,17 +3933,16 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -3818,17 +3933,16 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* /*
* Make sure that CPUs allocated to child partitions * Make sure that CPUs allocated to child partitions
* do not show up in effective_cpus. If no CPU is left, * do not show up in effective_cpus. If no CPU is left,
* we clear the subparts_cpus & let the child partitions * we clear the subpartitions_cpus & let the child partitions
* fight for the CPUs again. * fight for the CPUs again.
*/ */
if (top_cpuset.nr_subparts_cpus) { if (!cpumask_empty(subpartitions_cpus)) {
if (cpumask_subset(&new_cpus, if (cpumask_subset(&new_cpus, subpartitions_cpus)) {
top_cpuset.subparts_cpus)) { top_cpuset.nr_subparts = 0;
top_cpuset.nr_subparts_cpus = 0; cpumask_clear(subpartitions_cpus);
cpumask_clear(top_cpuset.subparts_cpus);
} else { } else {
cpumask_andnot(&new_cpus, &new_cpus, cpumask_andnot(&new_cpus, &new_cpus,
top_cpuset.subparts_cpus); subpartitions_cpus);
} }
} }
cpumask_copy(top_cpuset.effective_cpus, &new_cpus); cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
...@@ -3960,7 +4074,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) ...@@ -3960,7 +4074,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
* We first exclude cpus allocated to partitions. If there is no * We first exclude cpus allocated to partitions. If there is no
* allowable online cpu left, we fall back to all possible cpus. * allowable online cpu left, we fall back to all possible cpus.
*/ */
cpumask_andnot(pmask, possible_mask, top_cpuset.subparts_cpus); cpumask_andnot(pmask, possible_mask, subpartitions_cpus);
if (!cpumask_intersects(pmask, cpu_online_mask)) if (!cpumask_intersects(pmask, cpu_online_mask))
cpumask_copy(pmask, possible_mask); cpumask_copy(pmask, possible_mask);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment