Merge tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull more power management updates from Rafael Wysocki: "These are fixes and cleanups on top of the power management material for 5.12-rc1 merged previously. Specifics: - Address cpufreq regression introduced in 5.11 that causes CPU frequency reporting to be distorted on systems with CPPC that use acpi-cpufreq as the scaling driver (Rafael Wysocki). - Fix regression introduced during the 5.10 development cycle related to CPU hotplug and policy recreation in the qcom-cpufreq-hw driver (Shawn Guo). - Fix recent regression in the operating performance points (OPP) framework that may cause frequency updates to be skipped by mistake in some cases (Jonathan Marek). - Simplify schedutil governor code and remove a misleading comment from it (Yue Hu). - Fix kerneldoc comment typo in the cpufreq core (Yue Hu)" * tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: cpufreq: Fix typo in kerneldoc comment cpufreq: schedutil: Remove update_lock comment from struct sugov_policy definition cpufreq: schedutil: Remove needless sg_policy parameter from ignore_dl_rate_limit() cpufreq: ACPI: Set cpuinfo.max_freq directly if max boost is known cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks opp: Don't skip freq update for different frequency

Merge tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki: "These are fixes and cleanups on top of the power management material for 5.12-rc1 merged previously. Specifics: - Address cpufreq regression introduced in 5.11 that causes CPU frequency reporting to be distorted on systems with CPPC that use acpi-cpufreq as the scaling driver (Rafael Wysocki). - Fix regression introduced during the 5.10 development cycle related to CPU hotplug and policy recreation in the qcom-cpufreq-hw driver (Shawn Guo). - Fix recent regression in the operating performance points (OPP) framework that may cause frequency updates to be skipped by mistake in some cases (Jonathan Marek). - Simplify schedutil governor code and remove a misleading comment from it (Yue Hu). - Fix kerneldoc comment typo in the cpufreq core (Yue Hu)" * tag 'pm-5.12-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: cpufreq: Fix typo in kerneldoc comment cpufreq: schedutil: Remove update_lock comment from struct sugov_policy definition cpufreq: schedutil: Remove needless sg_policy parameter from ignore_dl_rate_limit() cpufreq: ACPI: Set cpuinfo.max_freq directly if max boost is known cpufreq: qcom-hw: drop devm_xxx() calls from init/exit hooks opp: Don't skip freq update for different frequency
005d3bd9 · Linus Torvalds · e0fbd25b · 08c2a406 · 005d3bd9 · 005d3bd9
Commit 005d3bd9 authored Feb 23, 2021 by Linus Torvalds
7 changed files
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -54,7 +54,6 @@ struct acpi_cpufreq_data {
 	unsigned int resume;
 	unsigned int cpu_feature;
 	unsigned int acpi_perf_cpu;
-	unsigned int first_perf_state;
 	cpumask_var_t freqdomain_cpus;
 	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
 	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
@@ -223,10 +222,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)

 	perf = to_perf_data(data);

-	cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state)
+	cpufreq_for_each_entry(pos, policy->freq_table)
 		if (msr == perf->states[pos->driver_data].status)
 			return pos->frequency;
-	return policy->freq_table[data->first_perf_state].frequency;
+	return policy->freq_table[0].frequency;
 }

 static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
@@ -365,7 +364,6 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	struct cpufreq_policy *policy;
 	unsigned int freq;
 	unsigned int cached_freq;
-	unsigned int state;

 	pr_debug("%s (%d)\n", __func__, cpu);

@@ -377,11 +375,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 	if (unlikely(!data || !policy->freq_table))
 		return 0;

-	state = to_perf_data(data)->state;
-	if (state < data->first_perf_state)
-		state = data->first_perf_state;
-
-	cached_freq = policy->freq_table[state].frequency;
+	cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
 	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
 	if (freq != cached_freq) {
 		/*
@@ -680,7 +674,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	unsigned int valid_states = 0;
 	unsigned int result = 0;
-	unsigned int state_count;
 	u64 max_boost_ratio;
 	unsigned int i;
 #ifdef CONFIG_SMP
@@ -795,28 +788,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		goto err_unreg;
 	}

-	state_count = perf->state_count + 1;
-
-	max_boost_ratio = get_max_boost_ratio(cpu);
-	if (max_boost_ratio) {
-		/*
-		 * Make a room for one more entry to represent the highest
-		 * available "boost" frequency.
-		 */
-		state_count++;
-		valid_states++;
-		data->first_perf_state = valid_states;
-	} else {
-		/*
-		 * If the maximum "boost" frequency is unknown, ask the arch
-		 * scale-invariance code to use the "nominal" performance for
-		 * CPU utilization scaling so as to prevent the schedutil
-		 * governor from selecting inadequate CPU frequencies.
-		 */
-		arch_set_max_freq_ratio(true);
-	}
-
-	freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL);
+	freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
+			     GFP_KERNEL);
 	if (!freq_table) {
 		result = -ENOMEM;
 		goto err_unreg;
@@ -851,27 +824,25 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	}
 	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;

+	max_boost_ratio = get_max_boost_ratio(cpu);
 	if (max_boost_ratio) {
-		unsigned int state = data->first_perf_state;
-		unsigned int freq = freq_table[state].frequency;
+		unsigned int freq = freq_table[0].frequency;

 		/*
 		 * Because the loop above sorts the freq_table entries in the
 		 * descending order, freq is the maximum frequency in the table.
 		 * Assume that it corresponds to the CPPC nominal frequency and
-		 * use it to populate the frequency field of the extra "boost"
-		 * frequency entry.
+		 * use it to set cpuinfo.max_freq.
 		 */
-		freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
+		policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
+	} else {
 		/*
-		 * The purpose of the extra "boost" frequency entry is to make
-		 * the rest of cpufreq aware of the real maximum frequency, but
-		 * the way to request it is the same as for the first_perf_state
-		 * entry that is expected to cover the entire range of "boost"
-		 * frequencies of the CPU, so copy the driver_data value from
-		 * that entry.
+		 * If the maximum "boost" frequency is unknown, ask the arch
+		 * scale-invariance code to use the "nominal" performance for
+		 * CPU utilization scaling so as to prevent the schedutil
+		 * governor from selecting inadequate CPU frequencies.
 		 */
-		freq_table[0].driver_data = freq_table[state].driver_data;
+		arch_set_max_freq_ratio(true);
 	}

 	policy->freq_table = freq_table;
@@ -947,8 +918,7 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
 {
 	struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
 							      policy->cpu);
-	struct acpi_cpufreq_data *data = policy->driver_data;
-	unsigned int freq = policy->freq_table[data->first_perf_state].frequency;
+	unsigned int freq = policy->freq_table[0].frequency;

 	if (perf->states[0].core_frequency * 1000 != freq)
 		pr_warn(FW_WARN "P-state 0 is not max freq\n");

--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2101,7 +2101,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
 * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
 * @cpu: Target CPU.
 * @min_perf: Minimum (required) performance level (units of @capacity).
- * @target_perf: Terget (desired) performance level (units of @capacity).
+ * @target_perf: Target (desired) performance level (units of @capacity).
 * @capacity: Capacity of the target CPU.
 *
 * Carry out a fast performance level switch of @cpu without sleeping.

--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -52,6 +52,12 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	}

 	policy->min = policy->cpuinfo.min_freq = min_freq;
+	policy->max = max_freq;
+	/*
+	 * If the driver has set its own cpuinfo.max_freq above max_freq, leave
+	 * it as is.
+	 */
+	if (policy->cpuinfo.max_freq < max_freq)
 		policy->max = policy->cpuinfo.max_freq = max_freq;

 	if (policy->min == ~0)

--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data {

 struct qcom_cpufreq_data {
 	void __iomem *base;
+	struct resource *res;
 	const struct qcom_cpufreq_soc_data *soc_data;
 };

@@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 	struct of_phandle_args args;
 	struct device_node *cpu_np;
 	struct device *cpu_dev;
+	struct resource *res;
 	void __iomem *base;
 	struct qcom_cpufreq_data *data;
 	int ret, index;
@@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)

 	index = args.args[0];

-	base = devm_platform_ioremap_resource(pdev, index);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+	if (!res) {
+		dev_err(dev, "failed to get mem resource %d\n", index);
+		return -ENODEV;
+	}
+
+	if (!request_mem_region(res->start, resource_size(res), res->name)) {
+		dev_err(dev, "failed to request resource %pR\n", res);
+		return -EBUSY;
+	}

-	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	base = ioremap(res->start, resource_size(res));
+	if (IS_ERR(base)) {
+		dev_err(dev, "failed to map resource %pR\n", res);
+		ret = PTR_ERR(base);
+		goto release_region;
+	}
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data) {
 		ret = -ENOMEM;
-		goto error;
+		goto unmap_base;
 	}

 	data->soc_data = of_device_get_match_data(&pdev->dev);
 	data->base = base;
+	data->res = res;

 	/* HW should be in enabled state to proceed */
 	if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) {
@@ -355,7 +372,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)

 	return 0;
 error:
-	devm_iounmap(dev, base);
+	kfree(data);
+unmap_base:
+	iounmap(data->base);
+release_region:
+	release_mem_region(res->start, resource_size(res));
 	return ret;
 }

@@ -363,12 +384,15 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct device *cpu_dev = get_cpu_device(policy->cpu);
 	struct qcom_cpufreq_data *data = policy->driver_data;
-	struct platform_device *pdev = cpufreq_get_driver_data();
+	struct resource *res = data->res;
+	void __iomem *base = data->base;

 	dev_pm_opp_remove_all_dynamic(cpu_dev);
 	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	kfree(policy->freq_table);
-	devm_iounmap(&pdev->dev, data->base);
+	kfree(data);
+	iounmap(base);
+	release_mem_region(res->start, resource_size(res));

 	return 0;
 }

--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -998,14 +998,15 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
 	old_opp = opp_table->current_opp;

 	/* Return early if nothing to do */
-	if (opp_table->enabled && old_opp == opp) {
+	if (old_opp == opp && opp_table->current_rate == freq &&
+	    opp_table->enabled) {
 		dev_dbg(dev, "%s: OPPs are same, nothing to do\n", __func__);
 		return 0;
 	}

 	dev_dbg(dev, "%s: switching OPP: Freq %lu -> %lu Hz, Level %u -> %u, Bw %u -> %u\n",
-		__func__, old_opp->rate, freq, old_opp->level, opp->level,
-		old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
+		__func__, opp_table->current_rate, freq, old_opp->level,
+		opp->level, old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
 		opp->bandwidth ? opp->bandwidth[0].peak : 0);

 	scaling_down = _opp_compare_key(old_opp, opp);
@@ -1061,6 +1062,7 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
 	/* Make sure current_opp doesn't get freed */
 	dev_pm_opp_get(opp);
 	opp_table->current_opp = opp;
+	opp_table->current_rate = freq;

 	return ret;
 }

--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -135,6 +135,7 @@ enum opp_table_access {
 * @clock_latency_ns_max: Max clock latency in nanoseconds.
 * @parsed_static_opps: Count of devices for which OPPs are initialized from DT.
 * @shared_opp: OPP is shared between multiple devices.
+ * @current_rate: Currently configured frequency.
 * @current_opp: Currently configured OPP for the table.
 * @suspend_opp: Pointer to OPP to be used during device suspend.
 * @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers.
@@ -184,6 +185,7 @@ struct opp_table {

 	unsigned int parsed_static_opps;
 	enum opp_table_access shared_opp;
+	unsigned long current_rate;
 	struct dev_pm_opp *current_opp;
 	struct dev_pm_opp *suspend_opp;


--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -26,7 +26,7 @@ struct sugov_policy {
 	struct sugov_tunables	*tunables;
 	struct list_head	tunables_hook;

-	raw_spinlock_t		update_lock;	/* For shared policies */
+	raw_spinlock_t		update_lock;
 	u64			last_freq_update_time;
 	s64			freq_update_delay_ns;
 	unsigned int		next_freq;
@@ -320,23 +320,21 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
 * Make sugov_should_update_freq() ignore the rate limit when DL
 * has increased the utilization.
 */
-static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
+static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
 {
 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-		sg_policy->limits_changed = true;
+		sg_cpu->sg_policy->limits_changed = true;
 }

 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
 					      u64 time, unsigned int flags)
 {
-	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
-
 	sugov_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;

-	ignore_dl_rate_limit(sg_cpu, sg_policy);
+	ignore_dl_rate_limit(sg_cpu);

-	if (!sugov_should_update_freq(sg_policy, time))
+	if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
 		return false;

 	sugov_get_util(sg_cpu);
@@ -451,7 +449,7 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
 	sugov_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;

-	ignore_dl_rate_limit(sg_cpu, sg_policy);
+	ignore_dl_rate_limit(sg_cpu);

 	if (sugov_should_update_freq(sg_policy, time)) {
 		next_f = sugov_next_freq_shared(sg_cpu, time);