Commit 4157c2fc authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge back earlier cpufreq material for v4.5.

parents 88b7b7c0 b122bcd9
......@@ -159,8 +159,8 @@ to be strictly associated with a P-state.
2.2 cpuinfo_transition_latency:
-------------------------------
The cpuinfo_transition_latency field is 0. The PCC specification does
not include a field to expose this value currently.
The cpuinfo_transition_latency field is CPUFREQ_ETERNAL. The PCC specification
does not include a field to expose this value currently.
2.3 cpuinfo_cur_freq:
---------------------
......
......@@ -242,6 +242,23 @@ nodes to be present and contain the properties described below.
Definition: Specifies the syscon node controlling the cpu core
power domains.
- dynamic-power-coefficient
Usage: optional
Value type: <prop-encoded-array>
Definition: A u32 value that represents the running time dynamic
power coefficient in units of mW/MHz/uVolt^2. The
coefficient can either be calculated from power
measurements or derived by analysis.
The dynamic power consumption of the CPU is
proportional to the square of the Voltage (V) and
the clock frequency (f). The coefficient is used to
calculate the dynamic power as below -
Pdyn = dynamic-power-coefficient * V^2 * f
where voltage is in uV, frequency is in MHz.
Example 1 (dual-cluster big.LITTLE system 32-bit):
cpus {
......
Binding for ST's CPUFreq driver
===============================
ST's CPUFreq driver attempts to read 'process' and 'version' attributes
from the SoC, then supplies the OPP framework with 'prop' and 'supported
hardware' information respectively. The framework is then able to read
the DT and operate in the usual way.
For more information about the expected DT format [See: ../opp/opp.txt].
Frequency Scaling only
----------------------
No vendor specific driver required for this.
Located in CPU's node:
- operating-points : [See: ../power/opp.txt]
Example [safe]
--------------
cpus {
cpu@0 {
/* kHz uV */
operating-points = <1500000 0
1200000 0
800000 0
500000 0>;
};
};
Dynamic Voltage and Frequency Scaling (DVFS)
--------------------------------------------
This requires the ST CPUFreq driver to supply 'process' and 'version' info.
Located in CPU's node:
- operating-points-v2 : [See ../power/opp.txt]
Example [unsafe]
----------------
cpus {
cpu@0 {
operating-points-v2 = <&cpu0_opp_table>;
};
};
cpu0_opp_table: opp_table {
compatible = "operating-points-v2";
/* ############################################################### */
/* # WARNING: Do not attempt to copy/replicate these nodes, # */
/* # they are only to be supplied by the bootloader !!! # */
/* ############################################################### */
opp0 {
/* Major Minor Substrate */
/* 2 all all */
opp-supported-hw = <0x00000004 0xffffffff 0xffffffff>;
opp-hz = /bits/ 64 <1500000000>;
clock-latency-ns = <10000000>;
opp-microvolt-pcode0 = <1200000>;
opp-microvolt-pcode1 = <1200000>;
opp-microvolt-pcode2 = <1200000>;
opp-microvolt-pcode3 = <1200000>;
opp-microvolt-pcode4 = <1170000>;
opp-microvolt-pcode5 = <1140000>;
opp-microvolt-pcode6 = <1100000>;
opp-microvolt-pcode7 = <1070000>;
};
opp1 {
/* Major Minor Substrate */
/* all all all */
opp-supported-hw = <0xffffffff 0xffffffff 0xffffffff>;
opp-hz = /bits/ 64 <1200000000>;
clock-latency-ns = <10000000>;
opp-microvolt-pcode0 = <1110000>;
opp-microvolt-pcode1 = <1150000>;
opp-microvolt-pcode2 = <1100000>;
opp-microvolt-pcode3 = <1080000>;
opp-microvolt-pcode4 = <1040000>;
opp-microvolt-pcode5 = <1020000>;
opp-microvolt-pcode6 = <980000>;
opp-microvolt-pcode7 = <930000>;
};
};
......@@ -45,21 +45,10 @@ Devices supporting OPPs must set their "operating-points-v2" property with
phandle to a OPP table in their DT node. The OPP core will use this phandle to
find the operating points for the device.
Devices may want to choose OPP tables at runtime and so can provide a list of
phandles here. But only *one* of them should be chosen at runtime. This must be
accompanied by a corresponding "operating-points-names" property, to uniquely
identify the OPP tables.
If required, this can be extended for SoC vendor specfic bindings. Such bindings
should be documented as Documentation/devicetree/bindings/power/<vendor>-opp.txt
and should have a compatible description like: "operating-points-v2-<vendor>".
Optional properties:
- operating-points-names: Names of OPP tables (required if multiple OPP
tables are present), to uniquely identify them. The same list must be present
for all the CPUs which are sharing clock/voltage rails and hence the OPP
tables.
* OPP Table Node
This describes the OPPs belonging to a device. This node can have following
......@@ -100,6 +89,14 @@ Optional properties:
Entries for multiple regulators must be present in the same order as
regulators are specified in device's DT node.
- opp-microvolt-<name>: Named opp-microvolt property. This is exactly similar to
the above opp-microvolt property, but allows multiple voltage ranges to be
provided for the same OPP. At runtime, the platform can pick a <name> and
matching opp-microvolt-<name> property will be enabled for all OPPs. If the
platform doesn't pick a specific <name> or the <name> doesn't match with any
opp-microvolt-<name> properties, then opp-microvolt property shall be used, if
present.
- opp-microamp: The maximum current drawn by the device in microamperes
considering system specific parameters (such as transients, process, aging,
maximum operating temperature range etc.) as necessary. This may be used to
......@@ -112,6 +109,9 @@ Optional properties:
for few regulators, then this should be marked as zero for them. If it isn't
required for any regulator, then this property need not be present.
- opp-microamp-<name>: Named opp-microamp property. Similar to
opp-microvolt-<name> property, but for microamp instead.
- clock-latency-ns: Specifies the maximum possible transition latency (in
nanoseconds) for switching to this OPP from any other OPP.
......@@ -123,6 +123,26 @@ Optional properties:
- opp-suspend: Marks the OPP to be used during device suspend. Only one OPP in
the table should have this.
- opp-supported-hw: This enables us to select only a subset of OPPs from the
larger OPP table, based on what version of the hardware we are running on. We
still can't have multiple nodes with the same opp-hz value in OPP table.
It's an user defined array containing a hierarchy of hardware version numbers,
supported by the OPP. For example: a platform with hierarchy of three levels
of versions (A, B and C), this field should be like <X Y Z>, where X
corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z
corresponds to version hierarchy C.
Each level of hierarchy is represented by a 32 bit value, and so there can be
only 32 different supported version per hierarchy. i.e. 1 bit per version. A
value of 0xFFFFFFFF will enable the OPP for all versions for that hierarchy
level. And a value of 0x00000000 will disable the OPP completely, and so we
never want that to happen.
If 32 values aren't sufficient for a version hierarchy, than that version
hierarchy can be contained in multiple 32 bit values. i.e. <X Y Z1 Z2> in the
above example, Z1 & Z2 refer to the version hierarchy Z.
- status: Marks the node enabled/disabled.
Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
......@@ -157,20 +177,20 @@ Example 1: Single cluster Dual-core ARM cortex A9, switch DVFS states together.
compatible = "operating-points-v2";
opp-shared;
opp00 {
opp@1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <970000 975000 985000>;
opp-microamp = <70000>;
clock-latency-ns = <300000>;
opp-suspend;
};
opp01 {
opp@1100000000 {
opp-hz = /bits/ 64 <1100000000>;
opp-microvolt = <980000 1000000 1010000>;
opp-microamp = <80000>;
clock-latency-ns = <310000>;
};
opp02 {
opp@1200000000 {
opp-hz = /bits/ 64 <1200000000>;
opp-microvolt = <1025000>;
clock-latency-ns = <290000>;
......@@ -236,20 +256,20 @@ independently.
* independently.
*/
opp00 {
opp@1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <970000 975000 985000>;
opp-microamp = <70000>;
clock-latency-ns = <300000>;
opp-suspend;
};
opp01 {
opp@1100000000 {
opp-hz = /bits/ 64 <1100000000>;
opp-microvolt = <980000 1000000 1010000>;
opp-microamp = <80000>;
clock-latency-ns = <310000>;
};
opp02 {
opp@1200000000 {
opp-hz = /bits/ 64 <1200000000>;
opp-microvolt = <1025000>;
opp-microamp = <90000;
......@@ -312,20 +332,20 @@ DVFS state together.
compatible = "operating-points-v2";
opp-shared;
opp00 {
opp@1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <970000 975000 985000>;
opp-microamp = <70000>;
clock-latency-ns = <300000>;
opp-suspend;
};
opp01 {
opp@1100000000 {
opp-hz = /bits/ 64 <1100000000>;
opp-microvolt = <980000 1000000 1010000>;
opp-microamp = <80000>;
clock-latency-ns = <310000>;
};
opp02 {
opp@1200000000 {
opp-hz = /bits/ 64 <1200000000>;
opp-microvolt = <1025000>;
opp-microamp = <90000>;
......@@ -338,20 +358,20 @@ DVFS state together.
compatible = "operating-points-v2";
opp-shared;
opp10 {
opp@1300000000 {
opp-hz = /bits/ 64 <1300000000>;
opp-microvolt = <1045000 1050000 1055000>;
opp-microamp = <95000>;
clock-latency-ns = <400000>;
opp-suspend;
};
opp11 {
opp@1400000000 {
opp-hz = /bits/ 64 <1400000000>;
opp-microvolt = <1075000>;
opp-microamp = <100000>;
clock-latency-ns = <400000>;
};
opp12 {
opp@1500000000 {
opp-hz = /bits/ 64 <1500000000>;
opp-microvolt = <1010000 1100000 1110000>;
opp-microamp = <95000>;
......@@ -378,7 +398,7 @@ Example 4: Handling multiple regulators
compatible = "operating-points-v2";
opp-shared;
opp00 {
opp@1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <970000>, /* Supply 0 */
<960000>, /* Supply 1 */
......@@ -391,7 +411,7 @@ Example 4: Handling multiple regulators
/* OR */
opp00 {
opp@1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <970000 975000 985000>, /* Supply 0 */
<960000 965000 975000>, /* Supply 1 */
......@@ -404,7 +424,7 @@ Example 4: Handling multiple regulators
/* OR */
opp00 {
opp@1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <970000 975000 985000>, /* Supply 0 */
<960000 965000 975000>, /* Supply 1 */
......@@ -417,7 +437,8 @@ Example 4: Handling multiple regulators
};
};
Example 5: Multiple OPP tables
Example 5: opp-supported-hw
(example: three level hierarchy of versions: cuts, substrate and process)
/ {
cpus {
......@@ -426,40 +447,73 @@ Example 5: Multiple OPP tables
...
cpu-supply = <&cpu_supply>
operating-points-v2 = <&cpu0_opp_table_slow>, <&cpu0_opp_table_fast>;
operating-points-names = "slow", "fast";
operating-points-v2 = <&cpu0_opp_table_slow>;
};
};
cpu0_opp_table_slow: opp_table_slow {
opp_table {
compatible = "operating-points-v2";
status = "okay";
opp-shared;
opp00 {
opp@600000000 {
/*
* Supports all substrate and process versions for 0xF
* cuts, i.e. only first four cuts.
*/
opp-supported-hw = <0xF 0xFFFFFFFF 0xFFFFFFFF>
opp-hz = /bits/ 64 <600000000>;
opp-microvolt = <900000 915000 925000>;
...
};
opp01 {
opp@800000000 {
/*
* Supports:
* - cuts: only one, 6th cut (represented by 6th bit).
* - substrate: supports 16 different substrate versions
* - process: supports 9 different process versions
*/
opp-supported-hw = <0x20 0xff0000ff 0x0000f4f0>
opp-hz = /bits/ 64 <800000000>;
opp-microvolt = <900000 915000 925000>;
...
};
};
};
Example 6: opp-microvolt-<name>, opp-microamp-<name>:
(example: device with two possible microvolt ranges: slow and fast)
cpu0_opp_table_fast: opp_table_fast {
/ {
cpus {
cpu@0 {
compatible = "arm,cortex-a7";
...
operating-points-v2 = <&cpu0_opp_table>;
};
};
cpu0_opp_table: opp_table0 {
compatible = "operating-points-v2";
status = "okay";
opp-shared;
opp10 {
opp@1000000000 {
opp-hz = /bits/ 64 <1000000000>;
...
opp-microvolt-slow = <900000 915000 925000>;
opp-microvolt-fast = <970000 975000 985000>;
opp-microamp-slow = <70000>;
opp-microamp-fast = <71000>;
};
opp11 {
opp-hz = /bits/ 64 <1100000000>;
...
opp@1200000000 {
opp-hz = /bits/ 64 <1200000000>;
opp-microvolt-slow = <900000 915000 925000>, /* Supply vcc0 */
<910000 925000 935000>; /* Supply vcc1 */
opp-microvolt-fast = <970000 975000 985000>, /* Supply vcc0 */
<960000 965000 975000>; /* Supply vcc1 */
opp-microamp = <70000>; /* Will be used for both slow/fast */
};
};
};
......@@ -64,73 +64,73 @@ cpu0_opp_table: opp_table0 {
compatible = "operating-points-v2";
opp-shared;
opp00 {
opp@200000000 {
opp-hz = /bits/ 64 <200000000>;
opp-microvolt = <900000>;
clock-latency-ns = <200000>;
};
opp01 {
opp@300000000 {
opp-hz = /bits/ 64 <300000000>;
opp-microvolt = <900000>;
clock-latency-ns = <200000>;
};
opp02 {
opp@400000000 {
opp-hz = /bits/ 64 <400000000>;
opp-microvolt = <925000>;
clock-latency-ns = <200000>;
};
opp03 {
opp@500000000 {
opp-hz = /bits/ 64 <500000000>;
opp-microvolt = <950000>;
clock-latency-ns = <200000>;
};
opp04 {
opp@600000000 {
opp-hz = /bits/ 64 <600000000>;
opp-microvolt = <975000>;
clock-latency-ns = <200000>;
};
opp05 {
opp@700000000 {
opp-hz = /bits/ 64 <700000000>;
opp-microvolt = <987500>;
clock-latency-ns = <200000>;
};
opp06 {
opp@800000000 {
opp-hz = /bits/ 64 <800000000>;
opp-microvolt = <1000000>;
clock-latency-ns = <200000>;
opp-suspend;
};
opp07 {
opp@900000000 {
opp-hz = /bits/ 64 <900000000>;
opp-microvolt = <1037500>;
clock-latency-ns = <200000>;
};
opp08 {
opp@1000000000 {
opp-hz = /bits/ 64 <1000000000>;
opp-microvolt = <1087500>;
clock-latency-ns = <200000>;
};
opp09 {
opp@1100000000 {
opp-hz = /bits/ 64 <1100000000>;
opp-microvolt = <1137500>;
clock-latency-ns = <200000>;
};
opp10 {
opp@1200000000 {
opp-hz = /bits/ 64 <1200000000>;
opp-microvolt = <1187500>;
clock-latency-ns = <200000>;
};
opp11 {
opp@1300000000 {
opp-hz = /bits/ 64 <1300000000>;
opp-microvolt = <1250000>;
clock-latency-ns = <200000>;
};
opp12 {
opp@1400000000 {
opp-hz = /bits/ 64 <1400000000>;
opp-microvolt = <1287500>;
clock-latency-ns = <200000>;
};
opp13 {
opp@1500000000 {
opp-hz = /bits/ 64 <1500000000>;
opp-microvolt = <1350000>;
clock-latency-ns = <200000>;
......
ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
obj-y += core.o cpu.o
obj-$(CONFIG_DEBUG_FS) += debugfs.o
This diff is collapsed.
/*
* Generic OPP debugfs interface
*
* Copyright (C) 2015-2016 Viresh Kumar <viresh.kumar@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/init.h>
#include <linux/limits.h>
#include "opp.h"
static struct dentry *rootdir;
static void opp_set_dev_name(const struct device *dev, char *name)
{
if (dev->parent)
snprintf(name, NAME_MAX, "%s-%s", dev_name(dev->parent),
dev_name(dev));
else
snprintf(name, NAME_MAX, "%s", dev_name(dev));
}
void opp_debug_remove_one(struct dev_pm_opp *opp)
{
debugfs_remove_recursive(opp->dentry);
}
int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp)
{
struct dentry *pdentry = dev_opp->dentry;
struct dentry *d;
char name[25]; /* 20 chars for 64 bit value + 5 (opp:\0) */
/* Rate is unique to each OPP, use it to give opp-name */
snprintf(name, sizeof(name), "opp:%lu", opp->rate);
/* Create per-opp directory */
d = debugfs_create_dir(name, pdentry);
if (!d)
return -ENOMEM;
if (!debugfs_create_bool("available", S_IRUGO, d, &opp->available))
return -ENOMEM;
if (!debugfs_create_bool("dynamic", S_IRUGO, d, &opp->dynamic))
return -ENOMEM;
if (!debugfs_create_bool("turbo", S_IRUGO, d, &opp->turbo))
return -ENOMEM;
if (!debugfs_create_bool("suspend", S_IRUGO, d, &opp->suspend))
return -ENOMEM;
if (!debugfs_create_ulong("rate_hz", S_IRUGO, d, &opp->rate))
return -ENOMEM;
if (!debugfs_create_ulong("u_volt_target", S_IRUGO, d, &opp->u_volt))
return -ENOMEM;
if (!debugfs_create_ulong("u_volt_min", S_IRUGO, d, &opp->u_volt_min))
return -ENOMEM;
if (!debugfs_create_ulong("u_volt_max", S_IRUGO, d, &opp->u_volt_max))
return -ENOMEM;
if (!debugfs_create_ulong("u_amp", S_IRUGO, d, &opp->u_amp))
return -ENOMEM;
if (!debugfs_create_ulong("clock_latency_ns", S_IRUGO, d,
&opp->clock_latency_ns))
return -ENOMEM;
opp->dentry = d;
return 0;
}
static int device_opp_debug_create_dir(struct device_list_opp *list_dev,
struct device_opp *dev_opp)
{
const struct device *dev = list_dev->dev;
struct dentry *d;
opp_set_dev_name(dev, dev_opp->dentry_name);
/* Create device specific directory */
d = debugfs_create_dir(dev_opp->dentry_name, rootdir);
if (!d) {
dev_err(dev, "%s: Failed to create debugfs dir\n", __func__);
return -ENOMEM;
}
list_dev->dentry = d;
dev_opp->dentry = d;
return 0;
}
static int device_opp_debug_create_link(struct device_list_opp *list_dev,
struct device_opp *dev_opp)
{
const struct device *dev = list_dev->dev;
char name[NAME_MAX];
struct dentry *d;
opp_set_dev_name(list_dev->dev, name);
/* Create device specific directory link */
d = debugfs_create_symlink(name, rootdir, dev_opp->dentry_name);
if (!d) {
dev_err(dev, "%s: Failed to create link\n", __func__);
return -ENOMEM;
}
list_dev->dentry = d;
return 0;
}
/**
* opp_debug_register - add a device opp node to the debugfs 'opp' directory
* @list_dev: list-dev pointer for device
* @dev_opp: the device-opp being added
*
* Dynamically adds device specific directory in debugfs 'opp' directory. If the
* device-opp is shared with other devices, then links will be created for all
* devices except the first.
*
* Return: 0 on success, otherwise negative error.
*/
int opp_debug_register(struct device_list_opp *list_dev,
struct device_opp *dev_opp)
{
if (!rootdir) {
pr_debug("%s: Uninitialized rootdir\n", __func__);
return -EINVAL;
}
if (dev_opp->dentry)
return device_opp_debug_create_link(list_dev, dev_opp);
return device_opp_debug_create_dir(list_dev, dev_opp);
}
static void opp_migrate_dentry(struct device_list_opp *list_dev,
struct device_opp *dev_opp)
{
struct device_list_opp *new_dev;
const struct device *dev;
struct dentry *dentry;
/* Look for next list-dev */
list_for_each_entry(new_dev, &dev_opp->dev_list, node)
if (new_dev != list_dev)
break;
/* new_dev is guaranteed to be valid here */
dev = new_dev->dev;
debugfs_remove_recursive(new_dev->dentry);
opp_set_dev_name(dev, dev_opp->dentry_name);
dentry = debugfs_rename(rootdir, list_dev->dentry, rootdir,
dev_opp->dentry_name);
if (!dentry) {
dev_err(dev, "%s: Failed to rename link from: %s to %s\n",
__func__, dev_name(list_dev->dev), dev_name(dev));
return;
}
new_dev->dentry = dentry;
dev_opp->dentry = dentry;
}
/**
* opp_debug_unregister - remove a device opp node from debugfs opp directory
* @list_dev: list-dev pointer for device
* @dev_opp: the device-opp being removed
*
* Dynamically removes device specific directory from debugfs 'opp' directory.
*/
void opp_debug_unregister(struct device_list_opp *list_dev,
struct device_opp *dev_opp)
{
if (list_dev->dentry == dev_opp->dentry) {
/* Move the real dentry object under another device */
if (!list_is_singular(&dev_opp->dev_list)) {
opp_migrate_dentry(list_dev, dev_opp);
goto out;
}
dev_opp->dentry = NULL;
}
debugfs_remove_recursive(list_dev->dentry);
out:
list_dev->dentry = NULL;
}
static int __init opp_debug_init(void)
{
/* Create /sys/kernel/debug/opp directory */
rootdir = debugfs_create_dir("opp", NULL);
if (!rootdir) {
pr_err("%s: Failed to create root directory\n", __func__);
return -ENOMEM;
}
return 0;
}
core_initcall(opp_debug_init);
......@@ -17,6 +17,7 @@
#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/limits.h>
#include <linux/pm_opp.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
......@@ -50,9 +51,10 @@ extern struct mutex dev_opp_list_lock;
* are protected by the dev_opp_list_lock for integrity.
* IMPORTANT: the opp nodes should be maintained in increasing
* order.
* @dynamic: not-created from static DT entries.
* @available: true/false - marks if this OPP as available or not
* @dynamic: not-created from static DT entries.
* @turbo: true if turbo (boost) OPP
* @suspend: true if suspend OPP
* @rate: Frequency in hertz
* @u_volt: Target voltage in microvolts corresponding to this OPP
* @u_volt_min: Minimum voltage in microvolts corresponding to this OPP
......@@ -63,6 +65,7 @@ extern struct mutex dev_opp_list_lock;
* @dev_opp: points back to the device_opp struct this opp belongs to
* @rcu_head: RCU callback head used for deferred freeing
* @np: OPP's device node.
* @dentry: debugfs dentry pointer (per opp)
*
* This structure stores the OPP information for a given device.
*/
......@@ -72,6 +75,7 @@ struct dev_pm_opp {
bool available;
bool dynamic;
bool turbo;
bool suspend;
unsigned long rate;
unsigned long u_volt;
......@@ -84,6 +88,10 @@ struct dev_pm_opp {
struct rcu_head rcu_head;
struct device_node *np;
#ifdef CONFIG_DEBUG_FS
struct dentry *dentry;
#endif
};
/**
......@@ -91,6 +99,7 @@ struct dev_pm_opp {
* @node: list node
* @dev: device to which the struct object belongs
* @rcu_head: RCU callback head used for deferred freeing
* @dentry: debugfs dentry pointer (per device)
*
* This is an internal data structure maintaining the list of devices that are
* managed by 'struct device_opp'.
......@@ -99,6 +108,10 @@ struct device_list_opp {
struct list_head node;
const struct device *dev;
struct rcu_head rcu_head;
#ifdef CONFIG_DEBUG_FS
struct dentry *dentry;
#endif
};
/**
......@@ -113,7 +126,14 @@ struct device_list_opp {
* @dev_list: list of devices that share these OPPs
* @opp_list: list of opps
* @np: struct device_node pointer for opp's DT node.
* @clock_latency_ns_max: Max clock latency in nanoseconds.
* @shared_opp: OPP is shared between multiple devices.
* @suspend_opp: Pointer to OPP to be used during device suspend.
* @supported_hw: Array of version number to support.
* @supported_hw_count: Number of elements in supported_hw array.
* @prop_name: A name to postfix to many DT properties, while parsing them.
* @dentry: debugfs dentry pointer of the real device directory (not links).
* @dentry_name: Name of the real dentry.
*
* This is an internal data structure maintaining the link to opps attached to
* a device. This structure is not meant to be shared to users as it is
......@@ -135,6 +155,15 @@ struct device_opp {
unsigned long clock_latency_ns_max;
bool shared_opp;
struct dev_pm_opp *suspend_opp;
unsigned int *supported_hw;
unsigned int supported_hw_count;
const char *prop_name;
#ifdef CONFIG_DEBUG_FS
struct dentry *dentry;
char dentry_name[NAME_MAX];
#endif
};
/* Routines internal to opp core */
......@@ -143,4 +172,26 @@ struct device_list_opp *_add_list_dev(const struct device *dev,
struct device_opp *dev_opp);
struct device_node *_of_get_opp_desc_node(struct device *dev);
#ifdef CONFIG_DEBUG_FS
void opp_debug_remove_one(struct dev_pm_opp *opp);
int opp_debug_create_one(struct dev_pm_opp *opp, struct device_opp *dev_opp);
int opp_debug_register(struct device_list_opp *list_dev,
struct device_opp *dev_opp);
void opp_debug_unregister(struct device_list_opp *list_dev,
struct device_opp *dev_opp);
#else
static inline void opp_debug_remove_one(struct dev_pm_opp *opp) {}
static inline int opp_debug_create_one(struct dev_pm_opp *opp,
struct device_opp *dev_opp)
{ return 0; }
static inline int opp_debug_register(struct device_list_opp *list_dev,
struct device_opp *dev_opp)
{ return 0; }
static inline void opp_debug_unregister(struct device_list_opp *list_dev,
struct device_opp *dev_opp)
{ }
#endif /* DEBUG_FS */
#endif /* __DRIVER_OPP_H__ */
......@@ -6,6 +6,8 @@
config ARM_BIG_LITTLE_CPUFREQ
tristate "Generic ARM big LITTLE CPUfreq driver"
depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK
# if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y
depends on !CPU_THERMAL || THERMAL
select PM_OPP
help
This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
......@@ -217,6 +219,16 @@ config ARM_SPEAR_CPUFREQ
help
This adds the CPUFreq driver support for SPEAr SOCs.
config ARM_STI_CPUFREQ
tristate "STi CPUFreq support"
depends on SOC_STIH407
help
This driver uses the generic OPP framework to match the running
platform with a predefined set of suitable values. If not provided
we will fall-back so safe-values contained in Device Tree. Enable
this config option if you wish to add CPUFreq support for STi based
SoCs.
config ARM_TEGRA20_CPUFREQ
bool "Tegra20 CPUFreq support"
depends on ARCH_TEGRA
......
......@@ -73,6 +73,7 @@ obj-$(CONFIG_ARM_SA1100_CPUFREQ) += sa1100-cpufreq.o
obj-$(CONFIG_ARM_SA1110_CPUFREQ) += sa1110-cpufreq.o
obj-$(CONFIG_ARM_SCPI_CPUFREQ) += scpi-cpufreq.o
obj-$(CONFIG_ARM_SPEAR_CPUFREQ) += spear-cpufreq.o
obj-$(CONFIG_ARM_STI_CPUFREQ) += sti-cpufreq.o
obj-$(CONFIG_ARM_TEGRA20_CPUFREQ) += tegra20-cpufreq.o
obj-$(CONFIG_ARM_TEGRA124_CPUFREQ) += tegra124-cpufreq.o
obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o
......
......@@ -23,6 +23,7 @@
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/cpumask.h>
#include <linux/cpu_cooling.h>
#include <linux/export.h>
#include <linux/module.h>
#include <linux/mutex.h>
......@@ -55,6 +56,7 @@ static bool bL_switching_enabled;
#define ACTUAL_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq << 1 : freq)
#define VIRT_FREQ(cluster, freq) ((cluster == A7_CLUSTER) ? freq >> 1 : freq)
static struct thermal_cooling_device *cdev[MAX_CLUSTERS];
static struct cpufreq_arm_bL_ops *arm_bL_ops;
static struct clk *clk[MAX_CLUSTERS];
static struct cpufreq_frequency_table *freq_table[MAX_CLUSTERS + 1];
......@@ -493,6 +495,12 @@ static int bL_cpufreq_init(struct cpufreq_policy *policy)
static int bL_cpufreq_exit(struct cpufreq_policy *policy)
{
struct device *cpu_dev;
int cur_cluster = cpu_to_cluster(policy->cpu);
if (cur_cluster < MAX_CLUSTERS) {
cpufreq_cooling_unregister(cdev[cur_cluster]);
cdev[cur_cluster] = NULL;
}
cpu_dev = get_cpu_device(policy->cpu);
if (!cpu_dev) {
......@@ -507,6 +515,38 @@ static int bL_cpufreq_exit(struct cpufreq_policy *policy)
return 0;
}
static void bL_cpufreq_ready(struct cpufreq_policy *policy)
{
struct device *cpu_dev = get_cpu_device(policy->cpu);
int cur_cluster = cpu_to_cluster(policy->cpu);
struct device_node *np;
/* Do not register a cpu_cooling device if we are in IKS mode */
if (cur_cluster >= MAX_CLUSTERS)
return;
np = of_node_get(cpu_dev->of_node);
if (WARN_ON(!np))
return;
if (of_find_property(np, "#cooling-cells", NULL)) {
u32 power_coefficient = 0;
of_property_read_u32(np, "dynamic-power-coefficient",
&power_coefficient);
cdev[cur_cluster] = of_cpufreq_power_cooling_register(np,
policy->related_cpus, power_coefficient, NULL);
if (IS_ERR(cdev[cur_cluster])) {
dev_err(cpu_dev,
"running cpufreq without cooling device: %ld\n",
PTR_ERR(cdev[cur_cluster]));
cdev[cur_cluster] = NULL;
}
}
of_node_put(np);
}
static struct cpufreq_driver bL_cpufreq_driver = {
.name = "arm-big-little",
.flags = CPUFREQ_STICKY |
......@@ -517,6 +557,7 @@ static struct cpufreq_driver bL_cpufreq_driver = {
.get = bL_cpufreq_get_rate,
.init = bL_cpufreq_init,
.exit = bL_cpufreq_exit,
.ready = bL_cpufreq_ready,
.attr = cpufreq_generic_attr,
};
......
......@@ -407,8 +407,13 @@ static void cpufreq_ready(struct cpufreq_policy *policy)
* thermal DT code takes care of matching them.
*/
if (of_find_property(np, "#cooling-cells", NULL)) {
priv->cdev = of_cpufreq_cooling_register(np,
policy->related_cpus);
u32 power_coefficient = 0;
of_property_read_u32(np, "dynamic-power-coefficient",
&power_coefficient);
priv->cdev = of_cpufreq_power_cooling_register(np,
policy->related_cpus, power_coefficient, NULL);
if (IS_ERR(priv->cdev)) {
dev_err(priv->cpu_dev,
"running cpufreq without cooling device: %ld\n",
......
......@@ -115,13 +115,13 @@ static void cs_check_cpu(int cpu, unsigned int load)
}
}
static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs,
struct dbs_data *dbs_data, bool modify_all)
static unsigned int cs_dbs_timer(struct cpufreq_policy *policy, bool modify_all)
{
struct dbs_data *dbs_data = policy->governor_data;
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
if (modify_all)
dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu);
dbs_check_cpu(dbs_data, policy->cpu);
return delay_for_sampling_rate(cs_tuners->sampling_rate);
}
......
......@@ -158,47 +158,55 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu)
}
EXPORT_SYMBOL_GPL(dbs_check_cpu);
static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data,
unsigned int delay)
void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay)
{
struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay);
}
void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
unsigned int delay, bool all_cpus)
{
int i;
struct dbs_data *dbs_data = policy->governor_data;
struct cpu_dbs_info *cdbs;
int cpu;
if (!all_cpus) {
/*
* Use raw_smp_processor_id() to avoid preemptible warnings.
* We know that this is only called with all_cpus == false from
* works that have been queued with *_work_on() functions and
* those works are canceled during CPU_DOWN_PREPARE so they
* can't possibly run on any other CPU.
*/
__gov_queue_work(raw_smp_processor_id(), dbs_data, delay);
} else {
for_each_cpu(i, policy->cpus)
__gov_queue_work(i, dbs_data, delay);
for_each_cpu(cpu, policy->cpus) {
cdbs = dbs_data->cdata->get_cpu_cdbs(cpu);
cdbs->timer.expires = jiffies + delay;
add_timer_on(&cdbs->timer, cpu);
}
}
EXPORT_SYMBOL_GPL(gov_queue_work);
EXPORT_SYMBOL_GPL(gov_add_timers);
static inline void gov_cancel_work(struct dbs_data *dbs_data,
struct cpufreq_policy *policy)
static inline void gov_cancel_timers(struct cpufreq_policy *policy)
{
struct dbs_data *dbs_data = policy->governor_data;
struct cpu_dbs_info *cdbs;
int i;
for_each_cpu(i, policy->cpus) {
cdbs = dbs_data->cdata->get_cpu_cdbs(i);
cancel_delayed_work_sync(&cdbs->dwork);
del_timer_sync(&cdbs->timer);
}
}
void gov_cancel_work(struct cpu_common_dbs_info *shared)
{
/* Tell dbs_timer_handler() to skip queuing up work items. */
atomic_inc(&shared->skip_work);
/*
* If dbs_timer_handler() is already running, it may not notice the
* incremented skip_work, so wait for it to complete to prevent its work
* item from being queued up after the cancel_work_sync() below.
*/
gov_cancel_timers(shared->policy);
/*
* In case dbs_timer_handler() managed to run and spawn a work item
* before the timers have been canceled, wait for that work item to
* complete and then cancel all of the timers set up by it. If
* dbs_timer_handler() runs again at that point, it will see the
* positive value of skip_work and won't spawn any more work items.
*/
cancel_work_sync(&shared->work);
gov_cancel_timers(shared->policy);
atomic_set(&shared->skip_work, 0);
}
EXPORT_SYMBOL_GPL(gov_cancel_work);
/* Will return if we need to evaluate cpu load again or not */
static bool need_load_eval(struct cpu_common_dbs_info *shared,
unsigned int sampling_rate)
......@@ -217,29 +225,21 @@ static bool need_load_eval(struct cpu_common_dbs_info *shared,
return true;
}
static void dbs_timer(struct work_struct *work)
static void dbs_work_handler(struct work_struct *work)
{
struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info,
dwork.work);
struct cpu_common_dbs_info *shared = cdbs->shared;
struct cpu_common_dbs_info *shared = container_of(work, struct
cpu_common_dbs_info, work);
struct cpufreq_policy *policy;
struct dbs_data *dbs_data;
unsigned int sampling_rate, delay;
bool modify_all = true;
mutex_lock(&shared->timer_mutex);
bool eval_load;
policy = shared->policy;
/*
* Governor might already be disabled and there is no point continuing
* with the work-handler.
*/
if (!policy)
goto unlock;
dbs_data = policy->governor_data;
/* Kill all timers */
gov_cancel_timers(policy);
if (dbs_data->cdata->governor == GOV_CONSERVATIVE) {
struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
......@@ -250,14 +250,37 @@ static void dbs_timer(struct work_struct *work)
sampling_rate = od_tuners->sampling_rate;
}
if (!need_load_eval(cdbs->shared, sampling_rate))
modify_all = false;
eval_load = need_load_eval(shared, sampling_rate);
delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all);
gov_queue_work(dbs_data, policy, delay, modify_all);
unlock:
/*
* Make sure cpufreq_governor_limits() isn't evaluating load in
* parallel.
*/
mutex_lock(&shared->timer_mutex);
delay = dbs_data->cdata->gov_dbs_timer(policy, eval_load);
mutex_unlock(&shared->timer_mutex);
atomic_dec(&shared->skip_work);
gov_add_timers(policy, delay);
}
static void dbs_timer_handler(unsigned long data)
{
struct cpu_dbs_info *cdbs = (struct cpu_dbs_info *)data;
struct cpu_common_dbs_info *shared = cdbs->shared;
/*
* Timer handler may not be allowed to queue the work at the moment,
* because:
* - Another timer handler has done that
* - We are stopping the governor
* - Or we are updating the sampling rate of the ondemand governor
*/
if (atomic_inc_return(&shared->skip_work) > 1)
atomic_dec(&shared->skip_work);
else
queue_work(system_wq, &shared->work);
}
static void set_sampling_rate(struct dbs_data *dbs_data,
......@@ -287,6 +310,9 @@ static int alloc_common_dbs_info(struct cpufreq_policy *policy,
for_each_cpu(j, policy->related_cpus)
cdata->get_cpu_cdbs(j)->shared = shared;
mutex_init(&shared->timer_mutex);
atomic_set(&shared->skip_work, 0);
INIT_WORK(&shared->work, dbs_work_handler);
return 0;
}
......@@ -297,6 +323,8 @@ static void free_common_dbs_info(struct cpufreq_policy *policy,
struct cpu_common_dbs_info *shared = cdbs->shared;
int j;
mutex_destroy(&shared->timer_mutex);
for_each_cpu(j, policy->cpus)
cdata->get_cpu_cdbs(j)->shared = NULL;
......@@ -433,7 +461,6 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
shared->policy = policy;
shared->time_stamp = ktime_get();
mutex_init(&shared->timer_mutex);
for_each_cpu(j, policy->cpus) {
struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j);
......@@ -450,7 +477,9 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
if (ignore_nice)
j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE];
INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer);
__setup_timer(&j_cdbs->timer, dbs_timer_handler,
(unsigned long)j_cdbs,
TIMER_DEFERRABLE | TIMER_IRQSAFE);
}
if (cdata->governor == GOV_CONSERVATIVE) {
......@@ -468,8 +497,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy,
od_ops->powersave_bias_init_cpu(cpu);
}
gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate),
true);
gov_add_timers(policy, delay_for_sampling_rate(sampling_rate));
return 0;
}
......@@ -483,18 +511,9 @@ static int cpufreq_governor_stop(struct cpufreq_policy *policy,
if (!shared || !shared->policy)
return -EBUSY;
/*
* Work-handler must see this updated, as it should not proceed any
* further after governor is disabled. And so timer_mutex is taken while
* updating this value.
*/
mutex_lock(&shared->timer_mutex);
gov_cancel_work(shared);
shared->policy = NULL;
mutex_unlock(&shared->timer_mutex);
gov_cancel_work(dbs_data, policy);
mutex_destroy(&shared->timer_mutex);
return 0;
}
......
......@@ -17,6 +17,7 @@
#ifndef _CPUFREQ_GOVERNOR_H
#define _CPUFREQ_GOVERNOR_H
#include <linux/atomic.h>
#include <linux/cpufreq.h>
#include <linux/kernel_stat.h>
#include <linux/module.h>
......@@ -132,12 +133,14 @@ static void *get_cpu_dbs_info_s(int cpu) \
struct cpu_common_dbs_info {
struct cpufreq_policy *policy;
/*
* percpu mutex that serializes governor limit change with dbs_timer
* invocation. We do not want dbs_timer to run when user is changing
* the governor or limits.
* Per policy mutex that serializes load evaluation from limit-change
* and work-handler.
*/
struct mutex timer_mutex;
ktime_t time_stamp;
atomic_t skip_work;
struct work_struct work;
};
/* Per cpu structures */
......@@ -152,7 +155,7 @@ struct cpu_dbs_info {
* wake-up from idle.
*/
unsigned int prev_load;
struct delayed_work dwork;
struct timer_list timer;
struct cpu_common_dbs_info *shared;
};
......@@ -209,8 +212,7 @@ struct common_dbs_data {
struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu);
void *(*get_cpu_dbs_info_s)(int cpu);
unsigned int (*gov_dbs_timer)(struct cpu_dbs_info *cdbs,
struct dbs_data *dbs_data,
unsigned int (*gov_dbs_timer)(struct cpufreq_policy *policy,
bool modify_all);
void (*gov_check_cpu)(int cpu, unsigned int load);
int (*init)(struct dbs_data *dbs_data, bool notify);
......@@ -269,11 +271,11 @@ static ssize_t show_sampling_rate_min_gov_pol \
extern struct mutex cpufreq_governor_lock;
void gov_add_timers(struct cpufreq_policy *policy, unsigned int delay);
void gov_cancel_work(struct cpu_common_dbs_info *shared);
void dbs_check_cpu(struct dbs_data *dbs_data, int cpu);
int cpufreq_governor_dbs(struct cpufreq_policy *policy,
struct common_dbs_data *cdata, unsigned int event);
void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy,
unsigned int delay, bool all_cpus);
void od_register_powersave_bias_handler(unsigned int (*f)
(struct cpufreq_policy *, unsigned int, unsigned int),
unsigned int powersave_bias);
......
......@@ -191,10 +191,9 @@ static void od_check_cpu(int cpu, unsigned int load)
}
}
static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs,
struct dbs_data *dbs_data, bool modify_all)
static unsigned int od_dbs_timer(struct cpufreq_policy *policy, bool modify_all)
{
struct cpufreq_policy *policy = cdbs->shared->policy;
struct dbs_data *dbs_data = policy->governor_data;
unsigned int cpu = policy->cpu;
struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
cpu);
......@@ -247,40 +246,66 @@ static void update_sampling_rate(struct dbs_data *dbs_data,
unsigned int new_rate)
{
struct od_dbs_tuners *od_tuners = dbs_data->tuners;
struct cpumask cpumask;
int cpu;
od_tuners->sampling_rate = new_rate = max(new_rate,
dbs_data->min_sampling_rate);
for_each_online_cpu(cpu) {
/*
* Lock governor so that governor start/stop can't execute in parallel.
*/
mutex_lock(&od_dbs_cdata.mutex);
cpumask_copy(&cpumask, cpu_online_mask);
for_each_cpu(cpu, &cpumask) {
struct cpufreq_policy *policy;
struct od_cpu_dbs_info_s *dbs_info;
struct cpu_dbs_info *cdbs;
struct cpu_common_dbs_info *shared;
unsigned long next_sampling, appointed_at;
policy = cpufreq_cpu_get(cpu);
if (!policy)
continue;
if (policy->governor != &cpufreq_gov_ondemand) {
cpufreq_cpu_put(policy);
continue;
}
dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
cpufreq_cpu_put(policy);
cdbs = &dbs_info->cdbs;
shared = cdbs->shared;
if (!delayed_work_pending(&dbs_info->cdbs.dwork))
/*
* A valid shared and shared->policy means governor hasn't
* stopped or exited yet.
*/
if (!shared || !shared->policy)
continue;
policy = shared->policy;
/* clear all CPUs of this policy */
cpumask_andnot(&cpumask, &cpumask, policy->cpus);
/*
* Update sampling rate for CPUs whose policy is governed by
* dbs_data. In case of governor_per_policy, only a single
* policy will be governed by dbs_data, otherwise there can be
* multiple policies that are governed by the same dbs_data.
*/
if (dbs_data != policy->governor_data)
continue;
/*
* Checking this for any CPU should be fine, timers for all of
* them are scheduled together.
*/
next_sampling = jiffies + usecs_to_jiffies(new_rate);
appointed_at = dbs_info->cdbs.dwork.timer.expires;
appointed_at = dbs_info->cdbs.timer.expires;
if (time_before(next_sampling, appointed_at)) {
cancel_delayed_work_sync(&dbs_info->cdbs.dwork);
gov_queue_work(dbs_data, policy,
usecs_to_jiffies(new_rate), true);
gov_cancel_work(shared);
gov_add_timers(policy, usecs_to_jiffies(new_rate));
}
}
mutex_unlock(&od_dbs_cdata.mutex);
}
static ssize_t store_sampling_rate(struct dbs_data *dbs_data, const char *buf,
......
......@@ -66,6 +66,7 @@ static inline int ceiling_fp(int32_t x)
struct sample {
int32_t core_pct_busy;
int32_t busy_scaled;
u64 aperf;
u64 mperf;
u64 tsc;
......@@ -112,6 +113,7 @@ struct cpudata {
u64 prev_aperf;
u64 prev_mperf;
u64 prev_tsc;
u64 prev_cummulative_iowait;
struct sample sample;
};
......@@ -133,6 +135,7 @@ struct pstate_funcs {
int (*get_scaling)(void);
void (*set)(struct cpudata*, int pstate);
void (*get_vid)(struct cpudata *);
int32_t (*get_target_pstate)(struct cpudata *);
};
struct cpu_defaults {
......@@ -140,6 +143,9 @@ struct cpu_defaults {
struct pstate_funcs funcs;
};
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu);
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu);
static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;
static int hwp_active;
......@@ -738,6 +744,7 @@ static struct cpu_defaults core_params = {
.get_turbo = core_get_turbo_pstate,
.get_scaling = core_get_scaling,
.set = core_set_pstate,
.get_target_pstate = get_target_pstate_use_performance,
},
};
......@@ -758,6 +765,7 @@ static struct cpu_defaults silvermont_params = {
.set = atom_set_pstate,
.get_scaling = silvermont_get_scaling,
.get_vid = atom_get_vid,
.get_target_pstate = get_target_pstate_use_cpu_load,
},
};
......@@ -778,6 +786,7 @@ static struct cpu_defaults airmont_params = {
.set = atom_set_pstate,
.get_scaling = airmont_get_scaling,
.get_vid = atom_get_vid,
.get_target_pstate = get_target_pstate_use_cpu_load,
},
};
......@@ -797,6 +806,7 @@ static struct cpu_defaults knl_params = {
.get_turbo = knl_get_turbo_pstate,
.get_scaling = core_get_scaling,
.set = core_set_pstate,
.get_target_pstate = get_target_pstate_use_performance,
},
};
......@@ -882,12 +892,11 @@ static inline void intel_pstate_sample(struct cpudata *cpu)
local_irq_save(flags);
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
if (cpu->prev_mperf == mperf) {
tsc = rdtsc();
if ((cpu->prev_mperf == mperf) || (cpu->prev_tsc == tsc)) {
local_irq_restore(flags);
return;
}
tsc = rdtsc();
local_irq_restore(flags);
cpu->last_sample_time = cpu->sample.time;
......@@ -922,7 +931,43 @@ static inline void intel_pstate_set_sample_time(struct cpudata *cpu)
mod_timer_pinned(&cpu->timer, jiffies + delay);
}
static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
{
struct sample *sample = &cpu->sample;
u64 cummulative_iowait, delta_iowait_us;
u64 delta_iowait_mperf;
u64 mperf, now;
int32_t cpu_load;
cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
/*
* Convert iowait time into number of IO cycles spent at max_freq.
* IO is considered as busy only for the cpu_load algorithm. For
* performance this is not needed since we always try to reach the
* maximum P-State, so we are already boosting the IOs.
*/
delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
cpu->pstate.max_pstate, MSEC_PER_SEC);
mperf = cpu->sample.mperf + delta_iowait_mperf;
cpu->prev_cummulative_iowait = cummulative_iowait;
/*
* The load can be estimated as the ratio of the mperf counter
* running at a constant frequency during active periods
* (C0) and the time stamp counter running at the same frequency
* also during C-states.
*/
cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
cpu->sample.busy_scaled = cpu_load;
return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
}
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
{
int32_t core_busy, max_pstate, current_pstate, sample_ratio;
s64 duration_us;
......@@ -960,30 +1005,24 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu)
core_busy = mul_fp(core_busy, sample_ratio);
}
return core_busy;
cpu->sample.busy_scaled = core_busy;
return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
}
static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
{
int32_t busy_scaled;
struct _pid *pid;
signed int ctl;
int from;
int from, target_pstate;
struct sample *sample;
from = cpu->pstate.current_pstate;
pid = &cpu->pid;
busy_scaled = intel_pstate_get_scaled_busy(cpu);
target_pstate = pstate_funcs.get_target_pstate(cpu);
ctl = pid_calc(pid, busy_scaled);
/* Negative values of ctl increase the pstate and vice versa */
intel_pstate_set_pstate(cpu, cpu->pstate.current_pstate - ctl, true);
intel_pstate_set_pstate(cpu, target_pstate, true);
sample = &cpu->sample;
trace_pstate_sample(fp_toint(sample->core_pct_busy),
fp_toint(busy_scaled),
fp_toint(sample->busy_scaled),
from,
cpu->pstate.current_pstate,
sample->mperf,
......@@ -1237,6 +1276,8 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
pstate_funcs.get_scaling = funcs->get_scaling;
pstate_funcs.set = funcs->set;
pstate_funcs.get_vid = funcs->get_vid;
pstate_funcs.get_target_pstate = funcs->get_target_pstate;
}
#if IS_ENABLED(CONFIG_ACPI)
......
......@@ -41,16 +41,35 @@
* the original PLL becomes stable at target frequency.
*/
struct mtk_cpu_dvfs_info {
struct cpumask cpus;
struct device *cpu_dev;
struct regulator *proc_reg;
struct regulator *sram_reg;
struct clk *cpu_clk;
struct clk *inter_clk;
struct thermal_cooling_device *cdev;
struct list_head list_head;
int intermediate_voltage;
bool need_voltage_tracking;
};
static LIST_HEAD(dvfs_info_list);
static struct mtk_cpu_dvfs_info *mtk_cpu_dvfs_info_lookup(int cpu)
{
struct mtk_cpu_dvfs_info *info;
struct list_head *list;
list_for_each(list, &dvfs_info_list) {
info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
if (cpumask_test_cpu(cpu, &info->cpus))
return info;
}
return NULL;
}
static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
int new_vproc)
{
......@@ -59,7 +78,10 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
int old_vproc, old_vsram, new_vsram, vsram, vproc, ret;
old_vproc = regulator_get_voltage(proc_reg);
old_vsram = regulator_get_voltage(sram_reg);
if (old_vproc < 0) {
pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc);
return old_vproc;
}
/* Vsram should not exceed the maximum allowed voltage of SoC. */
new_vsram = min(new_vproc + MIN_VOLT_SHIFT, MAX_VOLT_LIMIT);
......@@ -72,7 +94,17 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
*/
do {
old_vsram = regulator_get_voltage(sram_reg);
if (old_vsram < 0) {
pr_err("%s: invalid Vsram value: %d\n",
__func__, old_vsram);
return old_vsram;
}
old_vproc = regulator_get_voltage(proc_reg);
if (old_vproc < 0) {
pr_err("%s: invalid Vproc value: %d\n",
__func__, old_vproc);
return old_vproc;
}
vsram = min(new_vsram, old_vproc + MAX_VOLT_SHIFT);
......@@ -117,7 +149,17 @@ static int mtk_cpufreq_voltage_tracking(struct mtk_cpu_dvfs_info *info,
*/
do {
old_vproc = regulator_get_voltage(proc_reg);
if (old_vproc < 0) {
pr_err("%s: invalid Vproc value: %d\n",
__func__, old_vproc);
return old_vproc;
}
old_vsram = regulator_get_voltage(sram_reg);
if (old_vsram < 0) {
pr_err("%s: invalid Vsram value: %d\n",
__func__, old_vsram);
return old_vsram;
}
vproc = max(new_vproc, old_vsram - MAX_VOLT_SHIFT);
ret = regulator_set_voltage(proc_reg, vproc,
......@@ -185,6 +227,10 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
old_freq_hz = clk_get_rate(cpu_clk);
old_vproc = regulator_get_voltage(info->proc_reg);
if (old_vproc < 0) {
pr_err("%s: invalid Vproc value: %d\n", __func__, old_vproc);
return old_vproc;
}
freq_hz = freq_table[index].frequency * 1000;
......@@ -375,6 +421,9 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
*/
info->need_voltage_tracking = !IS_ERR(sram_reg);
/* CPUs in the same cluster share a clock and power domain. */
cpumask_copy(&info->cpus, &cpu_topology[cpu].core_sibling);
return 0;
out_free_opp_table:
......@@ -413,22 +462,18 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy)
struct cpufreq_frequency_table *freq_table;
int ret;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
ret = mtk_cpu_dvfs_info_init(info, policy->cpu);
if (ret) {
pr_err("%s failed to initialize dvfs info for cpu%d\n",
__func__, policy->cpu);
goto out_free_dvfs_info;
info = mtk_cpu_dvfs_info_lookup(policy->cpu);
if (!info) {
pr_err("dvfs info for cpu%d is not initialized.\n",
policy->cpu);
return -EINVAL;
}
ret = dev_pm_opp_init_cpufreq_table(info->cpu_dev, &freq_table);
if (ret) {
pr_err("failed to init cpufreq table for cpu%d: %d\n",
policy->cpu, ret);
goto out_release_dvfs_info;
return ret;
}
ret = cpufreq_table_validate_and_show(policy, freq_table);
......@@ -437,8 +482,7 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy)
goto out_free_cpufreq_table;
}
/* CPUs in the same cluster share a clock and power domain. */
cpumask_copy(policy->cpus, &cpu_topology[policy->cpu].core_sibling);
cpumask_copy(policy->cpus, &info->cpus);
policy->driver_data = info;
policy->clk = info->cpu_clk;
......@@ -446,13 +490,6 @@ static int mtk_cpufreq_init(struct cpufreq_policy *policy)
out_free_cpufreq_table:
dev_pm_opp_free_cpufreq_table(info->cpu_dev, &freq_table);
out_release_dvfs_info:
mtk_cpu_dvfs_info_release(info);
out_free_dvfs_info:
kfree(info);
return ret;
}
......@@ -462,14 +499,13 @@ static int mtk_cpufreq_exit(struct cpufreq_policy *policy)
cpufreq_cooling_unregister(info->cdev);
dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table);
mtk_cpu_dvfs_info_release(info);
kfree(info);
return 0;
}
static struct cpufreq_driver mt8173_cpufreq_driver = {
.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK,
.flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK |
CPUFREQ_HAVE_GOVERNOR_PER_POLICY,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = mtk_cpufreq_set_target,
.get = cpufreq_generic_get,
......@@ -482,11 +518,47 @@ static struct cpufreq_driver mt8173_cpufreq_driver = {
static int mt8173_cpufreq_probe(struct platform_device *pdev)
{
int ret;
struct mtk_cpu_dvfs_info *info;
struct list_head *list, *tmp;
int cpu, ret;
for_each_possible_cpu(cpu) {
info = mtk_cpu_dvfs_info_lookup(cpu);
if (info)
continue;
info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
if (!info) {
ret = -ENOMEM;
goto release_dvfs_info_list;
}
ret = mtk_cpu_dvfs_info_init(info, cpu);
if (ret) {
dev_err(&pdev->dev,
"failed to initialize dvfs info for cpu%d\n",
cpu);
goto release_dvfs_info_list;
}
list_add(&info->list_head, &dvfs_info_list);
}
ret = cpufreq_register_driver(&mt8173_cpufreq_driver);
if (ret)
pr_err("failed to register mtk cpufreq driver\n");
if (ret) {
dev_err(&pdev->dev, "failed to register mtk cpufreq driver\n");
goto release_dvfs_info_list;
}
return 0;
release_dvfs_info_list:
list_for_each_safe(list, tmp, &dvfs_info_list) {
info = list_entry(list, struct mtk_cpu_dvfs_info, list_head);
mtk_cpu_dvfs_info_release(info);
list_del(list);
}
return ret;
}
......
......@@ -555,6 +555,8 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->min = policy->cpuinfo.min_freq =
ioread32(&pcch_hdr->minimum_frequency) * 1000;
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
pr_debug("init: policy->max is %d, policy->min is %d\n",
policy->max, policy->min);
out:
......
......@@ -12,6 +12,7 @@
#include <linux/clk.h>
#include <linux/cpufreq.h>
#include <linux/cpu_cooling.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/kernel.h>
......@@ -33,6 +34,7 @@
struct cpu_data {
struct clk **pclk;
struct cpufreq_frequency_table *table;
struct thermal_cooling_device *cdev;
};
/**
......@@ -321,6 +323,27 @@ static int qoriq_cpufreq_target(struct cpufreq_policy *policy,
return clk_set_parent(policy->clk, parent);
}
static void qoriq_cpufreq_ready(struct cpufreq_policy *policy)
{
struct cpu_data *cpud = policy->driver_data;
struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
if (of_find_property(np, "#cooling-cells", NULL)) {
cpud->cdev = of_cpufreq_cooling_register(np,
policy->related_cpus);
if (IS_ERR(cpud->cdev)) {
pr_err("Failed to register cooling device cpu%d: %ld\n",
policy->cpu, PTR_ERR(cpud->cdev));
cpud->cdev = NULL;
}
}
of_node_put(np);
}
static struct cpufreq_driver qoriq_cpufreq_driver = {
.name = "qoriq_cpufreq",
.flags = CPUFREQ_CONST_LOOPS,
......@@ -329,6 +352,7 @@ static struct cpufreq_driver qoriq_cpufreq_driver = {
.verify = cpufreq_generic_frequency_table_verify,
.target_index = qoriq_cpufreq_target,
.get = cpufreq_generic_get,
.ready = qoriq_cpufreq_ready,
.attr = cpufreq_generic_attr,
};
......
/*
* Match running platform with pre-defined OPP values for CPUFreq
*
* Author: Ajit Pal Singh <ajitpal.singh@st.com>
* Lee Jones <lee.jones@linaro.org>
*
* Copyright (C) 2015 STMicroelectronics (R&D) Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the version 2 of the GNU General Public License as
* published by the Free Software Foundation
*/
#include <linux/cpu.h>
#include <linux/io.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/pm_opp.h>
#include <linux/regmap.h>
#define VERSION_ELEMENTS 3
#define MAX_PCODE_NAME_LEN 7
#define VERSION_SHIFT 28
#define HW_INFO_INDEX 1
#define MAJOR_ID_INDEX 1
#define MINOR_ID_INDEX 2
/*
* Only match on "suitable for ALL versions" entries
*
* This will be used with the BIT() macro. It sets the
* top bit of a 32bit value and is equal to 0x80000000.
*/
#define DEFAULT_VERSION 31
enum {
PCODE = 0,
SUBSTRATE,
DVFS_MAX_REGFIELDS,
};
/**
* ST CPUFreq Driver Data
*
* @cpu_node CPU's OF node
* @syscfg_eng Engineering Syscon register map
* @regmap Syscon register map
*/
static struct sti_cpufreq_ddata {
struct device *cpu;
struct regmap *syscfg_eng;
struct regmap *syscfg;
} ddata;
static int sti_cpufreq_fetch_major(void) {
struct device_node *np = ddata.cpu->of_node;
struct device *dev = ddata.cpu;
unsigned int major_offset;
unsigned int socid;
int ret;
ret = of_property_read_u32_index(np, "st,syscfg",
MAJOR_ID_INDEX, &major_offset);
if (ret) {
dev_err(dev, "No major number offset provided in %s [%d]\n",
np->full_name, ret);
return ret;
}
ret = regmap_read(ddata.syscfg, major_offset, &socid);
if (ret) {
dev_err(dev, "Failed to read major number from syscon [%d]\n",
ret);
return ret;
}
return ((socid >> VERSION_SHIFT) & 0xf) + 1;
}
static int sti_cpufreq_fetch_minor(void)
{
struct device *dev = ddata.cpu;
struct device_node *np = dev->of_node;
unsigned int minor_offset;
unsigned int minid;
int ret;
ret = of_property_read_u32_index(np, "st,syscfg-eng",
MINOR_ID_INDEX, &minor_offset);
if (ret) {
dev_err(dev,
"No minor number offset provided %s [%d]\n",
np->full_name, ret);
return ret;
}
ret = regmap_read(ddata.syscfg_eng, minor_offset, &minid);
if (ret) {
dev_err(dev,
"Failed to read the minor number from syscon [%d]\n",
ret);
return ret;
}
return minid & 0xf;
}
static int sti_cpufreq_fetch_regmap_field(const struct reg_field *reg_fields,
int hw_info_offset, int field)
{
struct regmap_field *regmap_field;
struct reg_field reg_field = reg_fields[field];
struct device *dev = ddata.cpu;
unsigned int value;
int ret;
reg_field.reg = hw_info_offset;
regmap_field = devm_regmap_field_alloc(dev,
ddata.syscfg_eng,
reg_field);
if (IS_ERR(regmap_field)) {
dev_err(dev, "Failed to allocate reg field\n");
return PTR_ERR(regmap_field);
}
ret = regmap_field_read(regmap_field, &value);
if (ret) {
dev_err(dev, "Failed to read %s code\n",
field ? "SUBSTRATE" : "PCODE");
return ret;
}
return value;
}
static const struct reg_field sti_stih407_dvfs_regfields[DVFS_MAX_REGFIELDS] = {
[PCODE] = REG_FIELD(0, 16, 19),
[SUBSTRATE] = REG_FIELD(0, 0, 2),
};
static const struct reg_field *sti_cpufreq_match(void)
{
if (of_machine_is_compatible("st,stih407") ||
of_machine_is_compatible("st,stih410"))
return sti_stih407_dvfs_regfields;
return NULL;
}
static int sti_cpufreq_set_opp_info(void)
{
struct device *dev = ddata.cpu;
struct device_node *np = dev->of_node;
const struct reg_field *reg_fields;
unsigned int hw_info_offset;
unsigned int version[VERSION_ELEMENTS];
int pcode, substrate, major, minor;
int ret;
char name[MAX_PCODE_NAME_LEN];
reg_fields = sti_cpufreq_match();
if (!reg_fields) {
dev_err(dev, "This SoC doesn't support voltage scaling");
return -ENODEV;
}
ret = of_property_read_u32_index(np, "st,syscfg-eng",
HW_INFO_INDEX, &hw_info_offset);
if (ret) {
dev_warn(dev, "Failed to read HW info offset from DT\n");
substrate = DEFAULT_VERSION;
pcode = 0;
goto use_defaults;
}
pcode = sti_cpufreq_fetch_regmap_field(reg_fields,
hw_info_offset,
PCODE);
if (pcode < 0) {
dev_warn(dev, "Failed to obtain process code\n");
/* Use default pcode */
pcode = 0;
}
substrate = sti_cpufreq_fetch_regmap_field(reg_fields,
hw_info_offset,
SUBSTRATE);
if (substrate) {
dev_warn(dev, "Failed to obtain substrate code\n");
/* Use default substrate */
substrate = DEFAULT_VERSION;
}
use_defaults:
major = sti_cpufreq_fetch_major();
if (major < 0) {
dev_err(dev, "Failed to obtain major version\n");
/* Use default major number */
major = DEFAULT_VERSION;
}
minor = sti_cpufreq_fetch_minor();
if (minor < 0) {
dev_err(dev, "Failed to obtain minor version\n");
/* Use default minor number */
minor = DEFAULT_VERSION;
}
snprintf(name, MAX_PCODE_NAME_LEN, "pcode%d", pcode);
ret = dev_pm_opp_set_prop_name(dev, name);
if (ret) {
dev_err(dev, "Failed to set prop name\n");
return ret;
}
version[0] = BIT(major);
version[1] = BIT(minor);
version[2] = BIT(substrate);
ret = dev_pm_opp_set_supported_hw(dev, version, VERSION_ELEMENTS);
if (ret) {
dev_err(dev, "Failed to set supported hardware\n");
return ret;
}
dev_dbg(dev, "pcode: %d major: %d minor: %d substrate: %d\n",
pcode, major, minor, substrate);
dev_dbg(dev, "version[0]: %x version[1]: %x version[2]: %x\n",
version[0], version[1], version[2]);
return 0;
}
static int sti_cpufreq_fetch_syscon_regsiters(void)
{
struct device *dev = ddata.cpu;
struct device_node *np = dev->of_node;
ddata.syscfg = syscon_regmap_lookup_by_phandle(np, "st,syscfg");
if (IS_ERR(ddata.syscfg)) {
dev_err(dev, "\"st,syscfg\" not supplied\n");
return PTR_ERR(ddata.syscfg);
}
ddata.syscfg_eng = syscon_regmap_lookup_by_phandle(np, "st,syscfg-eng");
if (IS_ERR(ddata.syscfg_eng)) {
dev_err(dev, "\"st,syscfg-eng\" not supplied\n");
return PTR_ERR(ddata.syscfg_eng);
}
return 0;
}
static int sti_cpufreq_init(void)
{
int ret;
ddata.cpu = get_cpu_device(0);
if (!ddata.cpu) {
dev_err(ddata.cpu, "Failed to get device for CPU0\n");
goto skip_voltage_scaling;
}
if (!of_get_property(ddata.cpu->of_node, "operating-points-v2", NULL)) {
dev_err(ddata.cpu, "OPP-v2 not supported\n");
goto skip_voltage_scaling;
}
ret = sti_cpufreq_fetch_syscon_regsiters();
if (ret)
goto skip_voltage_scaling;
ret = sti_cpufreq_set_opp_info();
if (!ret)
goto register_cpufreq_dt;
skip_voltage_scaling:
dev_err(ddata.cpu, "Not doing voltage scaling\n");
register_cpufreq_dt:
platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
return 0;
}
module_init(sti_cpufreq_init);
MODULE_DESCRIPTION("STMicroelectronics CPUFreq/OPP driver");
MODULE_AUTHOR("Ajitpal Singh <ajitpal.singh@st.com>");
MODULE_AUTHOR("Lee Jones <lee.jones@linaro.org>");
MODULE_LICENSE("GPL v2");
......@@ -55,6 +55,11 @@ int dev_pm_opp_enable(struct device *dev, unsigned long freq);
int dev_pm_opp_disable(struct device *dev, unsigned long freq);
struct srcu_notifier_head *dev_pm_opp_get_notifier(struct device *dev);
int dev_pm_opp_set_supported_hw(struct device *dev, const u32 *versions,
unsigned int count);
void dev_pm_opp_put_supported_hw(struct device *dev);
int dev_pm_opp_set_prop_name(struct device *dev, const char *name);
void dev_pm_opp_put_prop_name(struct device *dev);
#else
static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
{
......@@ -129,6 +134,23 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
{
return ERR_PTR(-EINVAL);
}
static inline int dev_pm_opp_set_supported_hw(struct device *dev,
const u32 *versions,
unsigned int count)
{
return -EINVAL;
}
static inline void dev_pm_opp_put_supported_hw(struct device *dev) {}
static inline int dev_pm_opp_set_prop_name(struct device *dev, const char *name)
{
return -EINVAL;
}
static inline void dev_pm_opp_put_prop_name(struct device *dev) {}
#endif /* CONFIG_PM_OPP */
#if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment