Commit 8dde191a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'sched-urgent-2024-05-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:

 - Fix a sched_balance_newidle setting bug

 - Fix bug in the setting of /sys/fs/cgroup/test/cpu.max.burst

 - Fix variable-shadowing build warning

 - Extend sched-domains debug output

 - Fix documentation

 - Fix comments

* tag 'sched-urgent-2024-05-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/core: Fix incorrect initialization of the 'burst' parameter in cpu_max_write()
  sched/fair: Remove stale FREQUENCY_UTIL comment
  sched/fair: Fix initial util_avg calculation
  docs: cgroup-v1: Clarify that domain levels are system-specific
  sched/debug: Dump domains' level
  sched/fair: Allow disabling sched_balance_newidle with sched_relax_domain_level
  arch/topology: Fix variable naming to avoid shadowing
parents fe0d43f2 49217ea1
......@@ -568,7 +568,7 @@ on the next tick. For some applications in special situation, waiting
The 'cpuset.sched_relax_domain_level' file allows you to request changing
this searching range as you like. This file takes int value which
indicates size of searching range in levels ideally as follows,
indicates size of searching range in levels approximately as follows,
otherwise initial value -1 that indicates the cpuset has no request.
====== ===========================================================
......@@ -581,6 +581,11 @@ otherwise initial value -1 that indicates the cpuset has no request.
5 search system wide [on NUMA system]
====== ===========================================================
Not all levels can be present and values can change depending on the
system architecture and kernel configuration. Check
/sys/kernel/debug/sched/domains/cpu*/domain*/ for system-specific
details.
The system default is architecture dependent. The system default
can be changed using the relax_domain_level= boot parameter.
......
......@@ -179,7 +179,7 @@ DEFINE_PER_CPU(unsigned long, hw_pressure);
void topology_update_hw_pressure(const struct cpumask *cpus,
unsigned long capped_freq)
{
unsigned long max_capacity, capacity, hw_pressure;
unsigned long max_capacity, capacity, pressure;
u32 max_freq;
int cpu;
......@@ -196,12 +196,12 @@ void topology_update_hw_pressure(const struct cpumask *cpus,
else
capacity = mult_frac(max_capacity, capped_freq, max_freq);
hw_pressure = max_capacity - capacity;
pressure = max_capacity - capacity;
trace_hw_pressure_update(cpu, hw_pressure);
trace_hw_pressure_update(cpu, pressure);
for_each_cpu(cpu, cpus)
WRITE_ONCE(per_cpu(hw_pressure, cpu), hw_pressure);
WRITE_ONCE(per_cpu(hw_pressure, cpu), pressure);
}
EXPORT_SYMBOL_GPL(topology_update_hw_pressure);
......
......@@ -2941,7 +2941,7 @@ bool current_cpuset_is_being_rebound(void)
static int update_relax_domain_level(struct cpuset *cs, s64 val)
{
#ifdef CONFIG_SMP
if (val < -1 || val >= sched_domain_level_max)
if (val < -1 || val > sched_domain_level_max + 1)
return -EINVAL;
#endif
......
......@@ -11401,7 +11401,7 @@ static ssize_t cpu_max_write(struct kernfs_open_file *of,
{
struct task_group *tg = css_tg(of_css(of));
u64 period = tg_get_cfs_period(tg);
u64 burst = tg_get_cfs_burst(tg);
u64 burst = tg->cfs_bandwidth.burst;
u64 quota;
int ret;
......
......@@ -425,6 +425,7 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level);
}
void update_sched_domain_debugfs(void)
......
......@@ -1030,7 +1030,8 @@ void init_entity_runnable_average(struct sched_entity *se)
* With new tasks being created, their initial util_avgs are extrapolated
* based on the cfs_rq's current util_avg:
*
* util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight
* util_avg = cfs_rq->avg.util_avg / (cfs_rq->avg.load_avg + 1)
* * se_weight(se)
*
* However, in many cases, the above util_avg does not give a desired
* value. Moreover, the sum of the util_avgs may be divergent, such
......@@ -1077,7 +1078,7 @@ void post_init_entity_util_avg(struct task_struct *p)
if (cap > 0) {
if (cfs_rq->avg.util_avg != 0) {
sa->util_avg = cfs_rq->avg.util_avg * se->load.weight;
sa->util_avg = cfs_rq->avg.util_avg * se_weight(se);
sa->util_avg /= (cfs_rq->avg.load_avg + 1);
if (sa->util_avg > cap)
......@@ -7898,8 +7899,8 @@ eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
* Performance domain frequency: utilization clamping
* must be considered since it affects the selection
* of the performance domain frequency.
* NOTE: in case RT tasks are running, by default the
* FREQUENCY_UTIL's utilization can be max OPP.
* NOTE: in case RT tasks are running, by default the min
* utilization can be max OPP.
*/
eff_util = effective_cpu_util(cpu, util, &min, &max);
......
......@@ -1474,7 +1474,7 @@ static void set_domain_attribute(struct sched_domain *sd,
} else
request = attr->relax_domain_level;
if (sd->level > request) {
if (sd->level >= request) {
/* Turn off idle balance on this domain: */
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment