Commit 26c92a38 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'pm-4.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management updates from Rafael Wysocki:
 "These are mostly fixes, including some fixes for changes made during
  the recent merge window and some "stable" material, plus some minor
  extensions of the turbostat utility.

  Specifics:

   - Fix the PM core to avoid introducing a runtime PM usage counter
     imbalance when adding device links during driver probe (Rafael
     Wysocki).

   - Fix the operating performance points (OPP) framework to ensure that
     the regulator voltage is always updated as appropriate when
     updating clock rates (Waldemar Rymarkiewicz).

   - Fix the intel_pstate driver to use correct max/min limits for cores
     with differing maximum frequences (Srinivas Pandruvada).

   - Fix a typo in the intel_pstate driver documentation (Rafael
     Wysocki).

   - Fix two issues with the recently added Kryo cpufreq driver (Ilia
     Lin).

   - Fix two recent regressions and some other minor issues in the
     turbostat utility and extend it to provide some more diagnostic
     information (Len Brown, Nathan Ciobanu)"

* tag 'pm-4.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  Documentation: intel_pstate: Fix typo
  tools/power turbostat: version 18.06.20
  tools/power turbostat: add the missing command line switches
  tools/power turbostat: add single character tokens to help
  tools/power turbostat: alphabetize the help output
  tools/power turbostat: fix segfault on 'no node' machines
  tools/power turbostat: add optional APIC X2APIC columns
  tools/power turbostat: decode cpuid.1.HT
  tools/power turbostat: fix show/hide issues resulting from mis-merge
  PM / OPP: Update voltage in case freq == old_freq
  cpufreq: intel_pstate: Fix scaling max/min limits with Turbo 3.0
  cpufreq: kryo: Add module remove and exit
  cpufreq: kryo: Fix possible error code dereference
  PM / core: Fix supplier device runtime PM usage counter imbalance
parents 1abd8a8f b51e0013
...@@ -410,7 +410,7 @@ argument is passed to the kernel in the command line. ...@@ -410,7 +410,7 @@ argument is passed to the kernel in the command line.
That only is supported in some configurations, though (for example, if That only is supported in some configurations, though (for example, if
the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
the operation mode of the driver cannot be changed), and if it is not the operation mode of the driver cannot be changed), and if it is not
supported in the current configuration, writes to this attribute with supported in the current configuration, writes to this attribute will
fail with an appropriate error. fail with an appropriate error.
Interpretation of Policy Attributes Interpretation of Policy Attributes
......
...@@ -236,6 +236,13 @@ struct device_link *device_link_add(struct device *consumer, ...@@ -236,6 +236,13 @@ struct device_link *device_link_add(struct device *consumer,
link->rpm_active = true; link->rpm_active = true;
} }
pm_runtime_new_link(consumer); pm_runtime_new_link(consumer);
/*
* If the link is being added by the consumer driver at probe
* time, balance the decrementation of the supplier's runtime PM
* usage counter after consumer probe in driver_probe_device().
*/
if (consumer->links.status == DL_DEV_PROBING)
pm_runtime_get_noresume(supplier);
} }
get_device(supplier); get_device(supplier);
link->supplier = supplier; link->supplier = supplier;
...@@ -255,12 +262,12 @@ struct device_link *device_link_add(struct device *consumer, ...@@ -255,12 +262,12 @@ struct device_link *device_link_add(struct device *consumer,
switch (consumer->links.status) { switch (consumer->links.status) {
case DL_DEV_PROBING: case DL_DEV_PROBING:
/* /*
* Balance the decrementation of the supplier's * Some callers expect the link creation during
* runtime PM usage counter after consumer probe * consumer driver probe to resume the supplier
* in driver_probe_device(). * even without DL_FLAG_RPM_ACTIVE.
*/ */
if (flags & DL_FLAG_PM_RUNTIME) if (flags & DL_FLAG_PM_RUNTIME)
pm_runtime_get_sync(supplier); pm_runtime_resume(supplier);
link->status = DL_STATE_CONSUMER_PROBE; link->status = DL_STATE_CONSUMER_PROBE;
break; break;
......
...@@ -294,6 +294,7 @@ struct pstate_funcs { ...@@ -294,6 +294,7 @@ struct pstate_funcs {
static struct pstate_funcs pstate_funcs __read_mostly; static struct pstate_funcs pstate_funcs __read_mostly;
static int hwp_active __read_mostly; static int hwp_active __read_mostly;
static int hwp_mode_bdw __read_mostly;
static bool per_cpu_limits __read_mostly; static bool per_cpu_limits __read_mostly;
static bool hwp_boost __read_mostly; static bool hwp_boost __read_mostly;
...@@ -1413,7 +1414,15 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) ...@@ -1413,7 +1414,15 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
cpu->pstate.scaling = pstate_funcs.get_scaling(); cpu->pstate.scaling = pstate_funcs.get_scaling();
cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
if (hwp_active && !hwp_mode_bdw) {
unsigned int phy_max, current_max;
intel_pstate_get_hwp_max(cpu->cpu, &phy_max, &current_max);
cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
} else {
cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
}
if (pstate_funcs.get_aperf_mperf_shift) if (pstate_funcs.get_aperf_mperf_shift)
cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
...@@ -2467,28 +2476,36 @@ static inline bool intel_pstate_has_acpi_ppc(void) { return false; } ...@@ -2467,28 +2476,36 @@ static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
static inline void intel_pstate_request_control_from_smm(void) {} static inline void intel_pstate_request_control_from_smm(void) {}
#endif /* CONFIG_ACPI */ #endif /* CONFIG_ACPI */
#define INTEL_PSTATE_HWP_BROADWELL 0x01
#define ICPU_HWP(model, hwp_mode) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode }
static const struct x86_cpu_id hwp_support_ids[] __initconst = { static const struct x86_cpu_id hwp_support_ids[] __initconst = {
{ X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL),
ICPU_HWP(X86_MODEL_ANY, 0),
{} {}
}; };
static int __init intel_pstate_init(void) static int __init intel_pstate_init(void)
{ {
const struct x86_cpu_id *id;
int rc; int rc;
if (no_load) if (no_load)
return -ENODEV; return -ENODEV;
if (x86_match_cpu(hwp_support_ids)) { id = x86_match_cpu(hwp_support_ids);
if (id) {
copy_cpu_funcs(&core_funcs); copy_cpu_funcs(&core_funcs);
if (!no_hwp) { if (!no_hwp) {
hwp_active++; hwp_active++;
hwp_mode_bdw = id->driver_data;
intel_pstate.attr = hwp_cpufreq_attrs; intel_pstate.attr = hwp_cpufreq_attrs;
goto hwp_cpu_matched; goto hwp_cpu_matched;
} }
} else { } else {
const struct x86_cpu_id *id;
id = x86_match_cpu(intel_pstate_cpu_ids); id = x86_match_cpu(intel_pstate_cpu_ids);
if (!id) if (!id)
return -ENODEV; return -ENODEV;
......
...@@ -42,6 +42,8 @@ enum _msm8996_version { ...@@ -42,6 +42,8 @@ enum _msm8996_version {
NUM_OF_MSM8996_VERSIONS, NUM_OF_MSM8996_VERSIONS,
}; };
struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev;
static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void) static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
{ {
size_t len; size_t len;
...@@ -74,7 +76,6 @@ static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void) ...@@ -74,7 +76,6 @@ static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
{ {
struct opp_table *opp_tables[NR_CPUS] = {0}; struct opp_table *opp_tables[NR_CPUS] = {0};
struct platform_device *cpufreq_dt_pdev;
enum _msm8996_version msm8996_version; enum _msm8996_version msm8996_version;
struct nvmem_cell *speedbin_nvmem; struct nvmem_cell *speedbin_nvmem;
struct device_node *np; struct device_node *np;
...@@ -115,6 +116,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) ...@@ -115,6 +116,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
speedbin = nvmem_cell_read(speedbin_nvmem, &len); speedbin = nvmem_cell_read(speedbin_nvmem, &len);
nvmem_cell_put(speedbin_nvmem); nvmem_cell_put(speedbin_nvmem);
if (IS_ERR(speedbin))
return PTR_ERR(speedbin);
switch (msm8996_version) { switch (msm8996_version) {
case MSM8996_V3: case MSM8996_V3:
...@@ -127,6 +130,7 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) ...@@ -127,6 +130,7 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
BUG(); BUG();
break; break;
} }
kfree(speedbin);
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
cpu_dev = get_cpu_device(cpu); cpu_dev = get_cpu_device(cpu);
...@@ -162,8 +166,15 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) ...@@ -162,8 +166,15 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
return ret; return ret;
} }
static int qcom_cpufreq_kryo_remove(struct platform_device *pdev)
{
platform_device_unregister(cpufreq_dt_pdev);
return 0;
}
static struct platform_driver qcom_cpufreq_kryo_driver = { static struct platform_driver qcom_cpufreq_kryo_driver = {
.probe = qcom_cpufreq_kryo_probe, .probe = qcom_cpufreq_kryo_probe,
.remove = qcom_cpufreq_kryo_remove,
.driver = { .driver = {
.name = "qcom-cpufreq-kryo", .name = "qcom-cpufreq-kryo",
}, },
...@@ -198,8 +209,9 @@ static int __init qcom_cpufreq_kryo_init(void) ...@@ -198,8 +209,9 @@ static int __init qcom_cpufreq_kryo_init(void)
if (unlikely(ret < 0)) if (unlikely(ret < 0))
return ret; return ret;
ret = PTR_ERR_OR_ZERO(platform_device_register_simple( kryo_cpufreq_pdev = platform_device_register_simple(
"qcom-cpufreq-kryo", -1, NULL, 0)); "qcom-cpufreq-kryo", -1, NULL, 0);
ret = PTR_ERR_OR_ZERO(kryo_cpufreq_pdev);
if (0 == ret) if (0 == ret)
return 0; return 0;
...@@ -208,5 +220,12 @@ static int __init qcom_cpufreq_kryo_init(void) ...@@ -208,5 +220,12 @@ static int __init qcom_cpufreq_kryo_init(void)
} }
module_init(qcom_cpufreq_kryo_init); module_init(qcom_cpufreq_kryo_init);
static void __init qcom_cpufreq_kryo_exit(void)
{
platform_device_unregister(kryo_cpufreq_pdev);
platform_driver_unregister(&qcom_cpufreq_kryo_driver);
}
module_exit(qcom_cpufreq_kryo_exit);
MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver"); MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver");
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
...@@ -598,7 +598,7 @@ static int _generic_set_opp_regulator(const struct opp_table *opp_table, ...@@ -598,7 +598,7 @@ static int _generic_set_opp_regulator(const struct opp_table *opp_table,
} }
/* Scaling up? Scale voltage before frequency */ /* Scaling up? Scale voltage before frequency */
if (freq > old_freq) { if (freq >= old_freq) {
ret = _set_opp_voltage(dev, reg, new_supply); ret = _set_opp_voltage(dev, reg, new_supply);
if (ret) if (ret)
goto restore_voltage; goto restore_voltage;
......
...@@ -56,7 +56,7 @@ name as necessary to disambiguate it from others is necessary. Note that option ...@@ -56,7 +56,7 @@ name as necessary to disambiguate it from others is necessary. Note that option
.PP .PP
\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group.
.PP .PP
\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds". \fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC".
The column name "all" can be used to enable all disabled-by-default built-in counters. The column name "all" can be used to enable all disabled-by-default built-in counters.
.PP .PP
\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group.
......
...@@ -109,6 +109,7 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ ...@@ -109,6 +109,7 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int has_misc_feature_control; unsigned int has_misc_feature_control;
unsigned int first_counter_read = 1;
#define RAPL_PKG (1 << 0) #define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */ /* 0x610 MSR_PKG_POWER_LIMIT */
...@@ -170,6 +171,8 @@ struct thread_data { ...@@ -170,6 +171,8 @@ struct thread_data {
unsigned long long irq_count; unsigned long long irq_count;
unsigned int smi_count; unsigned int smi_count;
unsigned int cpu_id; unsigned int cpu_id;
unsigned int apic_id;
unsigned int x2apic_id;
unsigned int flags; unsigned int flags;
#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_THREAD_IN_CORE 0x2
#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
...@@ -381,19 +384,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) ...@@ -381,19 +384,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
} }
/* /*
* Each string in this array is compared in --show and --hide cmdline. * This list matches the column headers, except
* Thus, strings that are proper sub-sets must follow their more specific peers. * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
* 2. Core and CPU are moved to the end, we can't have strings that contain them
* matching on them for --show and --hide.
*/ */
struct msr_counter bic[] = { struct msr_counter bic[] = {
{ 0x0, "usec" }, { 0x0, "usec" },
{ 0x0, "Time_Of_Day_Seconds" }, { 0x0, "Time_Of_Day_Seconds" },
{ 0x0, "Package" }, { 0x0, "Package" },
{ 0x0, "Node" },
{ 0x0, "Avg_MHz" }, { 0x0, "Avg_MHz" },
{ 0x0, "Busy%" },
{ 0x0, "Bzy_MHz" }, { 0x0, "Bzy_MHz" },
{ 0x0, "TSC_MHz" }, { 0x0, "TSC_MHz" },
{ 0x0, "IRQ" }, { 0x0, "IRQ" },
{ 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL}, { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
{ 0x0, "Busy%" }, { 0x0, "sysfs" },
{ 0x0, "CPU%c1" }, { 0x0, "CPU%c1" },
{ 0x0, "CPU%c3" }, { 0x0, "CPU%c3" },
{ 0x0, "CPU%c6" }, { 0x0, "CPU%c6" },
...@@ -424,73 +431,73 @@ struct msr_counter bic[] = { ...@@ -424,73 +431,73 @@ struct msr_counter bic[] = {
{ 0x0, "Cor_J" }, { 0x0, "Cor_J" },
{ 0x0, "GFX_J" }, { 0x0, "GFX_J" },
{ 0x0, "RAM_J" }, { 0x0, "RAM_J" },
{ 0x0, "Core" },
{ 0x0, "CPU" },
{ 0x0, "Mod%c6" }, { 0x0, "Mod%c6" },
{ 0x0, "sysfs" },
{ 0x0, "Totl%C0" }, { 0x0, "Totl%C0" },
{ 0x0, "Any%C0" }, { 0x0, "Any%C0" },
{ 0x0, "GFX%C0" }, { 0x0, "GFX%C0" },
{ 0x0, "CPUGFX%" }, { 0x0, "CPUGFX%" },
{ 0x0, "Node%" }, { 0x0, "Core" },
{ 0x0, "CPU" },
{ 0x0, "APIC" },
{ 0x0, "X2APIC" },
}; };
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
#define BIC_USEC (1ULL << 0) #define BIC_USEC (1ULL << 0)
#define BIC_TOD (1ULL << 1) #define BIC_TOD (1ULL << 1)
#define BIC_Package (1ULL << 2) #define BIC_Package (1ULL << 2)
#define BIC_Avg_MHz (1ULL << 3) #define BIC_Node (1ULL << 3)
#define BIC_Bzy_MHz (1ULL << 4) #define BIC_Avg_MHz (1ULL << 4)
#define BIC_TSC_MHz (1ULL << 5) #define BIC_Busy (1ULL << 5)
#define BIC_IRQ (1ULL << 6) #define BIC_Bzy_MHz (1ULL << 6)
#define BIC_SMI (1ULL << 7) #define BIC_TSC_MHz (1ULL << 7)
#define BIC_Busy (1ULL << 8) #define BIC_IRQ (1ULL << 8)
#define BIC_CPU_c1 (1ULL << 9) #define BIC_SMI (1ULL << 9)
#define BIC_CPU_c3 (1ULL << 10) #define BIC_sysfs (1ULL << 10)
#define BIC_CPU_c6 (1ULL << 11) #define BIC_CPU_c1 (1ULL << 11)
#define BIC_CPU_c7 (1ULL << 12) #define BIC_CPU_c3 (1ULL << 12)
#define BIC_ThreadC (1ULL << 13) #define BIC_CPU_c6 (1ULL << 13)
#define BIC_CoreTmp (1ULL << 14) #define BIC_CPU_c7 (1ULL << 14)
#define BIC_CoreCnt (1ULL << 15) #define BIC_ThreadC (1ULL << 15)
#define BIC_PkgTmp (1ULL << 16) #define BIC_CoreTmp (1ULL << 16)
#define BIC_GFX_rc6 (1ULL << 17) #define BIC_CoreCnt (1ULL << 17)
#define BIC_GFXMHz (1ULL << 18) #define BIC_PkgTmp (1ULL << 18)
#define BIC_Pkgpc2 (1ULL << 19) #define BIC_GFX_rc6 (1ULL << 19)
#define BIC_Pkgpc3 (1ULL << 20) #define BIC_GFXMHz (1ULL << 20)
#define BIC_Pkgpc6 (1ULL << 21) #define BIC_Pkgpc2 (1ULL << 21)
#define BIC_Pkgpc7 (1ULL << 22) #define BIC_Pkgpc3 (1ULL << 22)
#define BIC_Pkgpc8 (1ULL << 23) #define BIC_Pkgpc6 (1ULL << 23)
#define BIC_Pkgpc9 (1ULL << 24) #define BIC_Pkgpc7 (1ULL << 24)
#define BIC_Pkgpc10 (1ULL << 25) #define BIC_Pkgpc8 (1ULL << 25)
#define BIC_CPU_LPI (1ULL << 26) #define BIC_Pkgpc9 (1ULL << 26)
#define BIC_SYS_LPI (1ULL << 27) #define BIC_Pkgpc10 (1ULL << 27)
#define BIC_PkgWatt (1ULL << 26) #define BIC_CPU_LPI (1ULL << 28)
#define BIC_CorWatt (1ULL << 27) #define BIC_SYS_LPI (1ULL << 29)
#define BIC_GFXWatt (1ULL << 28) #define BIC_PkgWatt (1ULL << 30)
#define BIC_PkgCnt (1ULL << 29) #define BIC_CorWatt (1ULL << 31)
#define BIC_RAMWatt (1ULL << 30) #define BIC_GFXWatt (1ULL << 32)
#define BIC_PKG__ (1ULL << 31) #define BIC_PkgCnt (1ULL << 33)
#define BIC_RAM__ (1ULL << 32) #define BIC_RAMWatt (1ULL << 34)
#define BIC_Pkg_J (1ULL << 33) #define BIC_PKG__ (1ULL << 35)
#define BIC_Cor_J (1ULL << 34) #define BIC_RAM__ (1ULL << 36)
#define BIC_GFX_J (1ULL << 35) #define BIC_Pkg_J (1ULL << 37)
#define BIC_RAM_J (1ULL << 36) #define BIC_Cor_J (1ULL << 38)
#define BIC_Core (1ULL << 37) #define BIC_GFX_J (1ULL << 39)
#define BIC_CPU (1ULL << 38) #define BIC_RAM_J (1ULL << 40)
#define BIC_Mod_c6 (1ULL << 39) #define BIC_Mod_c6 (1ULL << 41)
#define BIC_sysfs (1ULL << 40) #define BIC_Totl_c0 (1ULL << 42)
#define BIC_Totl_c0 (1ULL << 41) #define BIC_Any_c0 (1ULL << 43)
#define BIC_Any_c0 (1ULL << 42) #define BIC_GFX_c0 (1ULL << 44)
#define BIC_GFX_c0 (1ULL << 43) #define BIC_CPUGFX (1ULL << 45)
#define BIC_CPUGFX (1ULL << 44) #define BIC_Core (1ULL << 46)
#define BIC_Node (1ULL << 45) #define BIC_CPU (1ULL << 47)
#define BIC_APIC (1ULL << 48)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD) #define BIC_X2APIC (1ULL << 49)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs; unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
...@@ -517,17 +524,34 @@ void help(void) ...@@ -517,17 +524,34 @@ void help(void)
"when COMMAND completes.\n" "when COMMAND completes.\n"
"If no COMMAND is specified, turbostat wakes every 5-seconds\n" "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
"to print statistics, until interrupted.\n" "to print statistics, until interrupted.\n"
"--add add a counter\n" " -a, --add add a counter\n"
" eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
"--cpu cpu-set limit output to summary plus cpu-set:\n" " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
" {core | package | j,k,l..m,n-p }\n" " {core | package | j,k,l..m,n-p }\n"
"--quiet skip decoding system configuration header\n" " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
"--interval sec.subsec Override default 5-second measurement interval\n" " -D, --Dump displays the raw counter values\n"
"--help print this help message\n" " -e, --enable [all | column]\n"
"--list list column headers only\n" " shows all or the specified disabled column\n"
"--num_iterations num number of the measurement iterations\n" " -H, --hide [column|column,column,...]\n"
"--out file create or truncate \"file\" for all output\n" " hide the specified column(s)\n"
"--version print version information\n" " -i, --interval sec.subsec\n"
" Override default 5-second measurement interval\n"
" -J, --Joules displays energy in Joules instead of Watts\n"
" -l, --list list column headers only\n"
" -n, --num_iterations num\n"
" number of the measurement iterations\n"
" -o, --out file\n"
" create or truncate \"file\" for all output\n"
" -q, --quiet skip decoding system configuration header\n"
" -s, --show [column|column,column,...]\n"
" show only the specified column(s)\n"
" -S, --Summary\n"
" limits output to 1-line system summary per interval\n"
" -T, --TCC temperature\n"
" sets the Thermal Control Circuit temperature in\n"
" degrees Celsius\n"
" -h, --help print this help message\n"
" -v, --version print version information\n"
"\n" "\n"
"For more help, run \"man turbostat\"\n"); "For more help, run \"man turbostat\"\n");
} }
...@@ -601,6 +625,10 @@ void print_header(char *delim) ...@@ -601,6 +625,10 @@ void print_header(char *delim)
outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU)) if (DO_BIC(BIC_CPU))
outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
if (DO_BIC(BIC_APIC))
outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
if (DO_BIC(BIC_X2APIC))
outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
if (DO_BIC(BIC_Avg_MHz)) if (DO_BIC(BIC_Avg_MHz))
outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
if (DO_BIC(BIC_Busy)) if (DO_BIC(BIC_Busy))
...@@ -880,6 +908,10 @@ int format_counters(struct thread_data *t, struct core_data *c, ...@@ -880,6 +908,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "%s-", (printed++ ? delim : "")); outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_CPU)) if (DO_BIC(BIC_CPU))
outp += sprintf(outp, "%s-", (printed++ ? delim : "")); outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_APIC))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
if (DO_BIC(BIC_X2APIC))
outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
} else { } else {
if (DO_BIC(BIC_Package)) { if (DO_BIC(BIC_Package)) {
if (p) if (p)
...@@ -904,6 +936,10 @@ int format_counters(struct thread_data *t, struct core_data *c, ...@@ -904,6 +936,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
} }
if (DO_BIC(BIC_CPU)) if (DO_BIC(BIC_CPU))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
if (DO_BIC(BIC_APIC))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
if (DO_BIC(BIC_X2APIC))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
} }
if (DO_BIC(BIC_Avg_MHz)) if (DO_BIC(BIC_Avg_MHz))
...@@ -1231,6 +1267,12 @@ delta_thread(struct thread_data *new, struct thread_data *old, ...@@ -1231,6 +1267,12 @@ delta_thread(struct thread_data *new, struct thread_data *old,
int i; int i;
struct msr_counter *mp; struct msr_counter *mp;
/* we run cpuid just the 1st time, copy the results */
if (DO_BIC(BIC_APIC))
new->apic_id = old->apic_id;
if (DO_BIC(BIC_X2APIC))
new->x2apic_id = old->x2apic_id;
/* /*
* the timestamps from start of measurement interval are in "old" * the timestamps from start of measurement interval are in "old"
* the timestamp from end of measurement interval are in "new" * the timestamp from end of measurement interval are in "new"
...@@ -1393,6 +1435,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, ...@@ -1393,6 +1435,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
int i; int i;
struct msr_counter *mp; struct msr_counter *mp;
/* copy un-changing apic_id's */
if (DO_BIC(BIC_APIC))
average.threads.apic_id = t->apic_id;
if (DO_BIC(BIC_X2APIC))
average.threads.x2apic_id = t->x2apic_id;
/* remember first tv_begin */ /* remember first tv_begin */
if (average.threads.tv_begin.tv_sec == 0) if (average.threads.tv_begin.tv_sec == 0)
average.threads.tv_begin = t->tv_begin; average.threads.tv_begin = t->tv_begin;
...@@ -1619,6 +1667,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) ...@@ -1619,6 +1667,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
return 0; return 0;
} }
void get_apic_id(struct thread_data *t)
{
unsigned int eax, ebx, ecx, edx, max_level;
eax = ebx = ecx = edx = 0;
if (!genuine_intel)
return;
__cpuid(0, max_level, ebx, ecx, edx);
__cpuid(1, eax, ebx, ecx, edx);
t->apic_id = (ebx >> 24) & 0xf;
if (max_level < 0xb)
return;
if (!DO_BIC(BIC_X2APIC))
return;
ecx = 0;
__cpuid(0xb, eax, ebx, ecx, edx);
t->x2apic_id = edx;
if (debug && (t->apic_id != t->x2apic_id))
fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
}
/* /*
* get_counters(...) * get_counters(...)
* migrate to cpu * migrate to cpu
...@@ -1632,7 +1708,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -1632,7 +1708,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
struct msr_counter *mp; struct msr_counter *mp;
int i; int i;
gettimeofday(&t->tv_begin, (struct timezone *)NULL); gettimeofday(&t->tv_begin, (struct timezone *)NULL);
if (cpu_migrate(cpu)) { if (cpu_migrate(cpu)) {
...@@ -1640,6 +1715,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) ...@@ -1640,6 +1715,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -1; return -1;
} }
if (first_counter_read)
get_apic_id(t);
retry: retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */ t->tsc = rdtsc(); /* we are running on local CPU of interest */
...@@ -2432,6 +2509,12 @@ void set_node_data(void) ...@@ -2432,6 +2509,12 @@ void set_node_data(void)
if (pni[pkg].count > topo.nodes_per_pkg) if (pni[pkg].count > topo.nodes_per_pkg)
topo.nodes_per_pkg = pni[0].count; topo.nodes_per_pkg = pni[0].count;
/* Fake 1 node per pkg for machines that don't
* expose nodes and thus avoid -nan results
*/
if (topo.nodes_per_pkg == 0)
topo.nodes_per_pkg = 1;
for (cpu = 0; cpu < topo.num_cpus; cpu++) { for (cpu = 0; cpu < topo.num_cpus; cpu++) {
pkg = cpus[cpu].physical_package_id; pkg = cpus[cpu].physical_package_id;
node = cpus[cpu].physical_node_id; node = cpus[cpu].physical_node_id;
...@@ -2879,6 +2962,7 @@ void do_sleep(void) ...@@ -2879,6 +2962,7 @@ void do_sleep(void)
} }
} }
void turbostat_loop() void turbostat_loop()
{ {
int retval; int retval;
...@@ -2892,6 +2976,7 @@ void turbostat_loop() ...@@ -2892,6 +2976,7 @@ void turbostat_loop()
snapshot_proc_sysfs_files(); snapshot_proc_sysfs_files();
retval = for_all_cpus(get_counters, EVEN_COUNTERS); retval = for_all_cpus(get_counters, EVEN_COUNTERS);
first_counter_read = 0;
if (retval < -1) { if (retval < -1) {
exit(retval); exit(retval);
} else if (retval == -1) { } else if (retval == -1) {
...@@ -4392,7 +4477,7 @@ void process_cpuid() ...@@ -4392,7 +4477,7 @@ void process_cpuid()
if (!quiet) { if (!quiet) {
fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
max_level, family, model, stepping, family, model, stepping); max_level, family, model, stepping, family, model, stepping);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
ecx & (1 << 0) ? "SSE3" : "-", ecx & (1 << 0) ? "SSE3" : "-",
ecx & (1 << 3) ? "MONITOR" : "-", ecx & (1 << 3) ? "MONITOR" : "-",
ecx & (1 << 6) ? "SMX" : "-", ecx & (1 << 6) ? "SMX" : "-",
...@@ -4401,6 +4486,7 @@ void process_cpuid() ...@@ -4401,6 +4486,7 @@ void process_cpuid()
edx & (1 << 4) ? "TSC" : "-", edx & (1 << 4) ? "TSC" : "-",
edx & (1 << 5) ? "MSR" : "-", edx & (1 << 5) ? "MSR" : "-",
edx & (1 << 22) ? "ACPI-TM" : "-", edx & (1 << 22) ? "ACPI-TM" : "-",
edx & (1 << 28) ? "HT" : "-",
edx & (1 << 29) ? "TM" : "-"); edx & (1 << 29) ? "TM" : "-");
} }
...@@ -4652,7 +4738,6 @@ void process_cpuid() ...@@ -4652,7 +4738,6 @@ void process_cpuid()
return; return;
} }
/* /*
* in /dev/cpu/ return success for names that are numbers * in /dev/cpu/ return success for names that are numbers
* ie. filter out ".", "..", "microcode". * ie. filter out ".", "..", "microcode".
...@@ -4842,6 +4927,13 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, ...@@ -4842,6 +4927,13 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
struct core_data *c; struct core_data *c;
struct pkg_data *p; struct pkg_data *p;
/* Workaround for systems where physical_node_id==-1
* and logical_node_id==(-1 - topo.num_cpus)
*/
if (node_id < 0)
node_id = 0;
t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
c = GET_CORE(core_base, core_id, node_id, pkg_id); c = GET_CORE(core_base, core_id, node_id, pkg_id);
p = GET_PKG(pkg_base, pkg_id); p = GET_PKG(pkg_base, pkg_id);
...@@ -4946,6 +5038,7 @@ int fork_it(char **argv) ...@@ -4946,6 +5038,7 @@ int fork_it(char **argv)
snapshot_proc_sysfs_files(); snapshot_proc_sysfs_files();
status = for_all_cpus(get_counters, EVEN_COUNTERS); status = for_all_cpus(get_counters, EVEN_COUNTERS);
first_counter_read = 0;
if (status) if (status)
exit(status); exit(status);
/* clear affinity side-effect of get_counters() */ /* clear affinity side-effect of get_counters() */
...@@ -5009,7 +5102,7 @@ int get_and_dump_counters(void) ...@@ -5009,7 +5102,7 @@ int get_and_dump_counters(void)
} }
void print_version() { void print_version() {
fprintf(outf, "turbostat version 18.06.01" fprintf(outf, "turbostat version 18.06.20"
" - Len Brown <lenb@kernel.org>\n"); " - Len Brown <lenb@kernel.org>\n");
} }
...@@ -5381,7 +5474,7 @@ void cmdline(int argc, char **argv) ...@@ -5381,7 +5474,7 @@ void cmdline(int argc, char **argv)
break; break;
case 'e': case 'e':
/* --enable specified counter */ /* --enable specified counter */
bic_enabled |= bic_lookup(optarg, SHOW_LIST); bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
break; break;
case 'd': case 'd':
debug++; debug++;
...@@ -5465,7 +5558,6 @@ void cmdline(int argc, char **argv) ...@@ -5465,7 +5558,6 @@ void cmdline(int argc, char **argv)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
outf = stderr; outf = stderr;
cmdline(argc, argv); cmdline(argc, argv);
if (!quiet) if (!quiet)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment