Commit 2cdd5cc7 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branch 'pm-cpuidle'

* pm-cpuidle:
  cpuidle-haltpoll: Enable kvm guest polling when dedicated physical CPUs are available
  cpuidle-haltpoll: do not set an owner to allow modunload
  cpuidle-haltpoll: return -ENODEV on modinit failure
  cpuidle-haltpoll: set haltpoll as preferred governor
  cpuidle: allow governor switch on cpuidle_register_driver()
  powercap: idle_inject: Use higher resolution for idle injection
  cpuidle: play_idle: Increase the resolution to usec
  cpuidle-haltpoll: vcpu hotplug support
  cpuidle: teo: Get rid of redundant check in teo_update()
  cpuidle: teo: Allow tick to be stopped if PM QoS is used
  cpuidle: menu: Allow tick to be stopped if PM QoS is used
  cpuidle: header file stubs must be "static inline"
  cpuidle-haltpoll: disable host side polling when kvm virtualized
  cpuidle: add haltpoll governor
  governors: unify last_state_idx
  cpuidle: add poll_limit_ns to cpuidle_device structure
  add cpuidle-haltpoll driver
parents d2817063 1328edca
Guest halt polling
==================
The cpuidle_haltpoll driver, with the haltpoll governor, allows
the guest vcpus to poll for a specified amount of time before
halting.
This provides the following benefits to host side polling:
1) The POLL flag is set while polling is performed, which allows
a remote vCPU to avoid sending an IPI (and the associated
cost of handling the IPI) when performing a wakeup.
2) The VM-exit cost can be avoided.
The downside of guest side polling is that polling is performed
even with other runnable tasks in the host.
The basic logic as follows: A global value, guest_halt_poll_ns,
is configured by the user, indicating the maximum amount of
time polling is allowed. This value is fixed.
Each vcpu has an adjustable guest_halt_poll_ns
("per-cpu guest_halt_poll_ns"), which is adjusted by the algorithm
in response to events (explained below).
Module Parameters
=================
The haltpoll governor has 5 tunable module parameters:
1) guest_halt_poll_ns:
Maximum amount of time, in nanoseconds, that polling is
performed before halting.
Default: 200000
2) guest_halt_poll_shrink:
Division factor used to shrink per-cpu guest_halt_poll_ns when
wakeup event occurs after the global guest_halt_poll_ns.
Default: 2
3) guest_halt_poll_grow:
Multiplication factor used to grow per-cpu guest_halt_poll_ns
when event occurs after per-cpu guest_halt_poll_ns
but before global guest_halt_poll_ns.
Default: 2
4) guest_halt_poll_grow_start:
The per-cpu guest_halt_poll_ns eventually reaches zero
in case of an idle system. This value sets the initial
per-cpu guest_halt_poll_ns when growing. This can
be increased from 10000, to avoid misses during the initial
growth stage:
10k, 20k, 40k, ... (example assumes guest_halt_poll_grow=2).
Default: 50000
5) guest_halt_poll_allow_shrink:
Bool parameter which allows shrinking. Set to N
to avoid it (per-cpu guest_halt_poll_ns will remain
high once achieves global guest_halt_poll_ns value).
Default: Y
The module parameters can be set from the debugfs files in:
/sys/module/haltpoll/parameters/
Further Notes
=============
- Care should be taken when setting the guest_halt_poll_ns parameter as a
large value has the potential to drive the cpu usage to 100% on a machine which
would be almost entirely idle otherwise.
...@@ -794,6 +794,7 @@ config KVM_GUEST ...@@ -794,6 +794,7 @@ config KVM_GUEST
bool "KVM Guest support (including kvmclock)" bool "KVM Guest support (including kvmclock)"
depends on PARAVIRT depends on PARAVIRT
select PARAVIRT_CLOCK select PARAVIRT_CLOCK
select ARCH_CPUIDLE_HALTPOLL
default y default y
---help--- ---help---
This option enables various optimizations for running under the KVM This option enables various optimizations for running under the KVM
...@@ -802,6 +803,12 @@ config KVM_GUEST ...@@ -802,6 +803,12 @@ config KVM_GUEST
underlying device model, the host provides the guest with underlying device model, the host provides the guest with
timing infrastructure such as time of day, and system time timing infrastructure such as time of day, and system time
config ARCH_CPUIDLE_HALTPOLL
def_bool n
prompt "Disable host haltpoll when loading haltpoll driver"
help
If virtualized under KVM, disable host haltpoll.
config PVH config PVH
bool "Support for running PVH guests" bool "Support for running PVH guests"
---help--- ---help---
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ARCH_HALTPOLL_H
#define _ARCH_HALTPOLL_H
void arch_haltpoll_enable(unsigned int cpu);
void arch_haltpoll_disable(unsigned int cpu);
#endif
...@@ -705,6 +705,7 @@ unsigned int kvm_arch_para_hints(void) ...@@ -705,6 +705,7 @@ unsigned int kvm_arch_para_hints(void)
{ {
return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
} }
EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
static uint32_t __init kvm_detect(void) static uint32_t __init kvm_detect(void)
{ {
...@@ -867,3 +868,39 @@ void __init kvm_spinlock_init(void) ...@@ -867,3 +868,39 @@ void __init kvm_spinlock_init(void)
} }
#endif /* CONFIG_PARAVIRT_SPINLOCKS */ #endif /* CONFIG_PARAVIRT_SPINLOCKS */
#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
static void kvm_disable_host_haltpoll(void *i)
{
wrmsrl(MSR_KVM_POLL_CONTROL, 0);
}
static void kvm_enable_host_haltpoll(void *i)
{
wrmsrl(MSR_KVM_POLL_CONTROL, 1);
}
void arch_haltpoll_enable(unsigned int cpu)
{
if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
pr_err_once("kvm: host does not support poll control\n");
pr_err_once("kvm: host upgrade recommended\n");
return;
}
/* Enable guest halt poll disables host halt poll */
smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
void arch_haltpoll_disable(unsigned int cpu)
{
if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
return;
/* Enable guest halt poll disables host halt poll */
smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
#endif
...@@ -580,7 +580,7 @@ void __cpuidle default_idle(void) ...@@ -580,7 +580,7 @@ void __cpuidle default_idle(void)
safe_halt(); safe_halt();
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
} }
#ifdef CONFIG_APM_MODULE #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
EXPORT_SYMBOL(default_idle); EXPORT_SYMBOL(default_idle);
#endif #endif
......
...@@ -33,6 +33,17 @@ config CPU_IDLE_GOV_TEO ...@@ -33,6 +33,17 @@ config CPU_IDLE_GOV_TEO
Some workloads benefit from using it and it generally should be safe Some workloads benefit from using it and it generally should be safe
to use. Say Y here if you are not happy with the alternatives. to use. Say Y here if you are not happy with the alternatives.
config CPU_IDLE_GOV_HALTPOLL
bool "Haltpoll governor (for virtualized systems)"
depends on KVM_GUEST
help
This governor implements haltpoll idle state selection, to be
used in conjunction with the haltpoll cpuidle driver, allowing
for polling for a certain amount of time before entering idle
state.
Some virtualized workloads benefit from using it.
config DT_IDLE_STATES config DT_IDLE_STATES
bool bool
...@@ -51,6 +62,15 @@ depends on PPC ...@@ -51,6 +62,15 @@ depends on PPC
source "drivers/cpuidle/Kconfig.powerpc" source "drivers/cpuidle/Kconfig.powerpc"
endmenu endmenu
config HALTPOLL_CPUIDLE
tristate "Halt poll cpuidle driver"
depends on X86 && KVM_GUEST
default y
help
This option enables halt poll cpuidle driver, which allows to poll
before halting in the guest (more efficient than polling in the
host via halt_poll_ns for some scenarios).
endif endif
config ARCH_NEEDS_CPU_IDLE_COUPLED config ARCH_NEEDS_CPU_IDLE_COUPLED
......
...@@ -7,6 +7,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ ...@@ -7,6 +7,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o
obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o
################################################################################## ##################################################################################
# ARM SoC drivers # ARM SoC drivers
......
// SPDX-License-Identifier: GPL-2.0
/*
* cpuidle driver for haltpoll governor.
*
* Copyright 2019 Red Hat, Inc. and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
* Authors: Marcelo Tosatti <mtosatti@redhat.com>
*/
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/cpuidle.h>
#include <linux/module.h>
#include <linux/sched/idle.h>
#include <linux/kvm_para.h>
#include <linux/cpuidle_haltpoll.h>
static struct cpuidle_device __percpu *haltpoll_cpuidle_devices;
static enum cpuhp_state haltpoll_hp_state;
static int default_enter_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
if (current_clr_polling_and_test()) {
local_irq_enable();
return index;
}
default_idle();
return index;
}
static struct cpuidle_driver haltpoll_driver = {
.name = "haltpoll",
.governor = "haltpoll",
.states = {
{ /* entry 0 is for polling */ },
{
.enter = default_enter_idle,
.exit_latency = 1,
.target_residency = 1,
.power_usage = -1,
.name = "haltpoll idle",
.desc = "default architecture idle",
},
},
.safe_state_index = 0,
.state_count = 2,
};
static int haltpoll_cpu_online(unsigned int cpu)
{
struct cpuidle_device *dev;
dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu);
if (!dev->registered) {
dev->cpu = cpu;
if (cpuidle_register_device(dev)) {
pr_notice("cpuidle_register_device %d failed!\n", cpu);
return -EIO;
}
arch_haltpoll_enable(cpu);
}
return 0;
}
static int haltpoll_cpu_offline(unsigned int cpu)
{
struct cpuidle_device *dev;
dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu);
if (dev->registered) {
arch_haltpoll_disable(cpu);
cpuidle_unregister_device(dev);
}
return 0;
}
static void haltpoll_uninit(void)
{
if (haltpoll_hp_state)
cpuhp_remove_state(haltpoll_hp_state);
cpuidle_unregister_driver(&haltpoll_driver);
free_percpu(haltpoll_cpuidle_devices);
haltpoll_cpuidle_devices = NULL;
}
static int __init haltpoll_init(void)
{
int ret;
struct cpuidle_driver *drv = &haltpoll_driver;
cpuidle_poll_state_init(drv);
if (!kvm_para_available() ||
!kvm_para_has_hint(KVM_HINTS_REALTIME))
return -ENODEV;
ret = cpuidle_register_driver(drv);
if (ret < 0)
return ret;
haltpoll_cpuidle_devices = alloc_percpu(struct cpuidle_device);
if (haltpoll_cpuidle_devices == NULL) {
cpuidle_unregister_driver(drv);
return -ENOMEM;
}
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpuidle/haltpoll:online",
haltpoll_cpu_online, haltpoll_cpu_offline);
if (ret < 0) {
haltpoll_uninit();
} else {
haltpoll_hp_state = ret;
ret = 0;
}
return ret;
}
static void __exit haltpoll_exit(void)
{
haltpoll_uninit();
}
module_init(haltpoll_init);
module_exit(haltpoll_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marcelo Tosatti <mtosatti@redhat.com>");
...@@ -361,6 +361,36 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index) ...@@ -361,6 +361,36 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index)
cpuidle_curr_governor->reflect(dev, index); cpuidle_curr_governor->reflect(dev, index);
} }
/**
* cpuidle_poll_time - return amount of time to poll for,
* governors can override dev->poll_limit_ns if necessary
*
* @drv: the cpuidle driver tied with the cpu
* @dev: the cpuidle device
*
*/
u64 cpuidle_poll_time(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
int i;
u64 limit_ns;
if (dev->poll_limit_ns)
return dev->poll_limit_ns;
limit_ns = TICK_NSEC;
for (i = 1; i < drv->state_count; i++) {
if (drv->states[i].disabled || dev->states_usage[i].disable)
continue;
limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
}
dev->poll_limit_ns = limit_ns;
return dev->poll_limit_ns;
}
/** /**
* cpuidle_install_idle_handler - installs the cpuidle idle loop handler * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
*/ */
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
/* For internal use only */ /* For internal use only */
extern char param_governor[]; extern char param_governor[];
extern struct cpuidle_governor *cpuidle_curr_governor; extern struct cpuidle_governor *cpuidle_curr_governor;
extern struct cpuidle_governor *cpuidle_prev_governor;
extern struct list_head cpuidle_governors; extern struct list_head cpuidle_governors;
extern struct list_head cpuidle_detected_devices; extern struct list_head cpuidle_detected_devices;
extern struct mutex cpuidle_lock; extern struct mutex cpuidle_lock;
...@@ -22,6 +23,7 @@ extern void cpuidle_install_idle_handler(void); ...@@ -22,6 +23,7 @@ extern void cpuidle_install_idle_handler(void);
extern void cpuidle_uninstall_idle_handler(void); extern void cpuidle_uninstall_idle_handler(void);
/* governors */ /* governors */
extern struct cpuidle_governor *cpuidle_find_governor(const char *str);
extern int cpuidle_switch_governor(struct cpuidle_governor *gov); extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
/* sysfs */ /* sysfs */
......
...@@ -254,12 +254,25 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) ...@@ -254,12 +254,25 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
*/ */
int cpuidle_register_driver(struct cpuidle_driver *drv) int cpuidle_register_driver(struct cpuidle_driver *drv)
{ {
struct cpuidle_governor *gov;
int ret; int ret;
spin_lock(&cpuidle_driver_lock); spin_lock(&cpuidle_driver_lock);
ret = __cpuidle_register_driver(drv); ret = __cpuidle_register_driver(drv);
spin_unlock(&cpuidle_driver_lock); spin_unlock(&cpuidle_driver_lock);
if (!ret && !strlen(param_governor) && drv->governor &&
(cpuidle_get_driver() == drv)) {
mutex_lock(&cpuidle_lock);
gov = cpuidle_find_governor(drv->governor);
if (gov) {
cpuidle_prev_governor = cpuidle_curr_governor;
if (cpuidle_switch_governor(gov) < 0)
cpuidle_prev_governor = NULL;
}
mutex_unlock(&cpuidle_lock);
}
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(cpuidle_register_driver); EXPORT_SYMBOL_GPL(cpuidle_register_driver);
...@@ -274,9 +287,21 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver); ...@@ -274,9 +287,21 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver);
*/ */
void cpuidle_unregister_driver(struct cpuidle_driver *drv) void cpuidle_unregister_driver(struct cpuidle_driver *drv)
{ {
bool enabled = (cpuidle_get_driver() == drv);
spin_lock(&cpuidle_driver_lock); spin_lock(&cpuidle_driver_lock);
__cpuidle_unregister_driver(drv); __cpuidle_unregister_driver(drv);
spin_unlock(&cpuidle_driver_lock); spin_unlock(&cpuidle_driver_lock);
if (!enabled)
return;
mutex_lock(&cpuidle_lock);
if (cpuidle_prev_governor) {
if (!cpuidle_switch_governor(cpuidle_prev_governor))
cpuidle_prev_governor = NULL;
}
mutex_unlock(&cpuidle_lock);
} }
EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
......
...@@ -20,14 +20,15 @@ char param_governor[CPUIDLE_NAME_LEN]; ...@@ -20,14 +20,15 @@ char param_governor[CPUIDLE_NAME_LEN];
LIST_HEAD(cpuidle_governors); LIST_HEAD(cpuidle_governors);
struct cpuidle_governor *cpuidle_curr_governor; struct cpuidle_governor *cpuidle_curr_governor;
struct cpuidle_governor *cpuidle_prev_governor;
/** /**
* __cpuidle_find_governor - finds a governor of the specified name * cpuidle_find_governor - finds a governor of the specified name
* @str: the name * @str: the name
* *
* Must be called with cpuidle_lock acquired. * Must be called with cpuidle_lock acquired.
*/ */
static struct cpuidle_governor * __cpuidle_find_governor(const char *str) struct cpuidle_governor *cpuidle_find_governor(const char *str)
{ {
struct cpuidle_governor *gov; struct cpuidle_governor *gov;
...@@ -87,7 +88,7 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) ...@@ -87,7 +88,7 @@ int cpuidle_register_governor(struct cpuidle_governor *gov)
return -ENODEV; return -ENODEV;
mutex_lock(&cpuidle_lock); mutex_lock(&cpuidle_lock);
if (__cpuidle_find_governor(gov->name) == NULL) { if (cpuidle_find_governor(gov->name) == NULL) {
ret = 0; ret = 0;
list_add_tail(&gov->governor_list, &cpuidle_governors); list_add_tail(&gov->governor_list, &cpuidle_governors);
if (!cpuidle_curr_governor || if (!cpuidle_curr_governor ||
......
...@@ -6,3 +6,4 @@ ...@@ -6,3 +6,4 @@
obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o
obj-$(CONFIG_CPU_IDLE_GOV_HALTPOLL) += haltpoll.o
// SPDX-License-Identifier: GPL-2.0
/*
* haltpoll.c - haltpoll idle governor
*
* Copyright 2019 Red Hat, Inc. and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2. See
* the COPYING file in the top-level directory.
*
* Authors: Marcelo Tosatti <mtosatti@redhat.com>
*/
#include <linux/kernel.h>
#include <linux/cpuidle.h>
#include <linux/time.h>
#include <linux/ktime.h>
#include <linux/hrtimer.h>
#include <linux/tick.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/kvm_para.h>
static unsigned int guest_halt_poll_ns __read_mostly = 200000;
module_param(guest_halt_poll_ns, uint, 0644);
/* division factor to shrink halt_poll_ns */
static unsigned int guest_halt_poll_shrink __read_mostly = 2;
module_param(guest_halt_poll_shrink, uint, 0644);
/* multiplication factor to grow per-cpu poll_limit_ns */
static unsigned int guest_halt_poll_grow __read_mostly = 2;
module_param(guest_halt_poll_grow, uint, 0644);
/* value in us to start growing per-cpu halt_poll_ns */
static unsigned int guest_halt_poll_grow_start __read_mostly = 50000;
module_param(guest_halt_poll_grow_start, uint, 0644);
/* allow shrinking guest halt poll */
static bool guest_halt_poll_allow_shrink __read_mostly = true;
module_param(guest_halt_poll_allow_shrink, bool, 0644);
/**
* haltpoll_select - selects the next idle state to enter
* @drv: cpuidle driver containing state data
* @dev: the CPU
* @stop_tick: indication on whether or not to stop the tick
*/
static int haltpoll_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev,
bool *stop_tick)
{
int latency_req = cpuidle_governor_latency_req(dev->cpu);
if (!drv->state_count || latency_req == 0) {
*stop_tick = false;
return 0;
}
if (dev->poll_limit_ns == 0)
return 1;
/* Last state was poll? */
if (dev->last_state_idx == 0) {
/* Halt if no event occurred on poll window */
if (dev->poll_time_limit == true)
return 1;
*stop_tick = false;
/* Otherwise, poll again */
return 0;
}
*stop_tick = false;
/* Last state was halt: poll */
return 0;
}
static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
{
unsigned int val;
u64 block_ns = block_us*NSEC_PER_USEC;
/* Grow cpu_halt_poll_us if
* cpu_halt_poll_us < block_ns < guest_halt_poll_us
*/
if (block_ns > dev->poll_limit_ns && block_ns <= guest_halt_poll_ns) {
val = dev->poll_limit_ns * guest_halt_poll_grow;
if (val < guest_halt_poll_grow_start)
val = guest_halt_poll_grow_start;
if (val > guest_halt_poll_ns)
val = guest_halt_poll_ns;
dev->poll_limit_ns = val;
} else if (block_ns > guest_halt_poll_ns &&
guest_halt_poll_allow_shrink) {
unsigned int shrink = guest_halt_poll_shrink;
val = dev->poll_limit_ns;
if (shrink == 0)
val = 0;
else
val /= shrink;
dev->poll_limit_ns = val;
}
}
/**
* haltpoll_reflect - update variables and update poll time
* @dev: the CPU
* @index: the index of actual entered state
*/
static void haltpoll_reflect(struct cpuidle_device *dev, int index)
{
dev->last_state_idx = index;
if (index != 0)
adjust_poll_limit(dev, dev->last_residency);
}
/**
* haltpoll_enable_device - scans a CPU's states and does setup
* @drv: cpuidle driver
* @dev: the CPU
*/
static int haltpoll_enable_device(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
dev->poll_limit_ns = 0;
return 0;
}
static struct cpuidle_governor haltpoll_governor = {
.name = "haltpoll",
.rating = 9,
.enable = haltpoll_enable_device,
.select = haltpoll_select,
.reflect = haltpoll_reflect,
};
static int __init init_haltpoll(void)
{
if (kvm_para_available())
return cpuidle_register_governor(&haltpoll_governor);
return 0;
}
postcore_initcall(init_haltpoll);
...@@ -38,7 +38,6 @@ struct ladder_device_state { ...@@ -38,7 +38,6 @@ struct ladder_device_state {
struct ladder_device { struct ladder_device {
struct ladder_device_state states[CPUIDLE_STATE_MAX]; struct ladder_device_state states[CPUIDLE_STATE_MAX];
int last_state_idx;
}; };
static DEFINE_PER_CPU(struct ladder_device, ladder_devices); static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
...@@ -49,12 +48,13 @@ static DEFINE_PER_CPU(struct ladder_device, ladder_devices); ...@@ -49,12 +48,13 @@ static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
* @old_idx: the current state index * @old_idx: the current state index
* @new_idx: the new target state index * @new_idx: the new target state index
*/ */
static inline void ladder_do_selection(struct ladder_device *ldev, static inline void ladder_do_selection(struct cpuidle_device *dev,
struct ladder_device *ldev,
int old_idx, int new_idx) int old_idx, int new_idx)
{ {
ldev->states[old_idx].stats.promotion_count = 0; ldev->states[old_idx].stats.promotion_count = 0;
ldev->states[old_idx].stats.demotion_count = 0; ldev->states[old_idx].stats.demotion_count = 0;
ldev->last_state_idx = new_idx; dev->last_state_idx = new_idx;
} }
/** /**
...@@ -68,13 +68,13 @@ static int ladder_select_state(struct cpuidle_driver *drv, ...@@ -68,13 +68,13 @@ static int ladder_select_state(struct cpuidle_driver *drv,
{ {
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct ladder_device_state *last_state; struct ladder_device_state *last_state;
int last_residency, last_idx = ldev->last_state_idx; int last_residency, last_idx = dev->last_state_idx;
int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
int latency_req = cpuidle_governor_latency_req(dev->cpu); int latency_req = cpuidle_governor_latency_req(dev->cpu);
/* Special case when user has set very strict latency requirement */ /* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0)) { if (unlikely(latency_req == 0)) {
ladder_do_selection(ldev, last_idx, 0); ladder_do_selection(dev, ldev, last_idx, 0);
return 0; return 0;
} }
...@@ -91,7 +91,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, ...@@ -91,7 +91,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
last_state->stats.promotion_count++; last_state->stats.promotion_count++;
last_state->stats.demotion_count = 0; last_state->stats.demotion_count = 0;
if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
ladder_do_selection(ldev, last_idx, last_idx + 1); ladder_do_selection(dev, ldev, last_idx, last_idx + 1);
return last_idx + 1; return last_idx + 1;
} }
} }
...@@ -107,7 +107,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, ...@@ -107,7 +107,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
if (drv->states[i].exit_latency <= latency_req) if (drv->states[i].exit_latency <= latency_req)
break; break;
} }
ladder_do_selection(ldev, last_idx, i); ladder_do_selection(dev, ldev, last_idx, i);
return i; return i;
} }
...@@ -116,7 +116,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, ...@@ -116,7 +116,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
last_state->stats.demotion_count++; last_state->stats.demotion_count++;
last_state->stats.promotion_count = 0; last_state->stats.promotion_count = 0;
if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
ladder_do_selection(ldev, last_idx, last_idx - 1); ladder_do_selection(dev, ldev, last_idx, last_idx - 1);
return last_idx - 1; return last_idx - 1;
} }
} }
...@@ -139,7 +139,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv, ...@@ -139,7 +139,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
struct ladder_device_state *lstate; struct ladder_device_state *lstate;
struct cpuidle_state *state; struct cpuidle_state *state;
ldev->last_state_idx = first_idx; dev->last_state_idx = first_idx;
for (i = first_idx; i < drv->state_count; i++) { for (i = first_idx; i < drv->state_count; i++) {
state = &drv->states[i]; state = &drv->states[i];
...@@ -167,9 +167,8 @@ static int ladder_enable_device(struct cpuidle_driver *drv, ...@@ -167,9 +167,8 @@ static int ladder_enable_device(struct cpuidle_driver *drv,
*/ */
static void ladder_reflect(struct cpuidle_device *dev, int index) static void ladder_reflect(struct cpuidle_device *dev, int index)
{ {
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
if (index > 0) if (index > 0)
ldev->last_state_idx = index; dev->last_state_idx = index;
} }
static struct cpuidle_governor ladder_governor = { static struct cpuidle_governor ladder_governor = {
......
...@@ -117,7 +117,6 @@ ...@@ -117,7 +117,6 @@
*/ */
struct menu_device { struct menu_device {
int last_state_idx;
int needs_update; int needs_update;
int tick_wakeup; int tick_wakeup;
...@@ -302,9 +301,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -302,9 +301,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
!drv->states[0].disabled && !dev->states_usage[0].disable)) { !drv->states[0].disabled && !dev->states_usage[0].disable)) {
/* /*
* In this case state[0] will be used no matter what, so return * In this case state[0] will be used no matter what, so return
* it right away and keep the tick running. * it right away and keep the tick running if state[0] is a
* polling one.
*/ */
*stop_tick = false; *stop_tick = !(drv->states[0].flags & CPUIDLE_FLAG_POLLING);
return 0; return 0;
} }
...@@ -395,16 +395,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -395,16 +395,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
return idx; return idx;
} }
if (s->exit_latency > latency_req) { if (s->exit_latency > latency_req)
/*
* If we break out of the loop for latency reasons, use
* the target residency of the selected state as the
* expected idle duration so that the tick is retained
* as long as that target residency is low enough.
*/
predicted_us = drv->states[idx].target_residency;
break; break;
}
idx = i; idx = i;
} }
...@@ -455,7 +448,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) ...@@ -455,7 +448,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
{ {
struct menu_device *data = this_cpu_ptr(&menu_devices); struct menu_device *data = this_cpu_ptr(&menu_devices);
data->last_state_idx = index; dev->last_state_idx = index;
data->needs_update = 1; data->needs_update = 1;
data->tick_wakeup = tick_nohz_idle_got_tick(); data->tick_wakeup = tick_nohz_idle_got_tick();
} }
...@@ -468,7 +461,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) ...@@ -468,7 +461,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
{ {
struct menu_device *data = this_cpu_ptr(&menu_devices); struct menu_device *data = this_cpu_ptr(&menu_devices);
int last_idx = data->last_state_idx; int last_idx = dev->last_state_idx;
struct cpuidle_state *target = &drv->states[last_idx]; struct cpuidle_state *target = &drv->states[last_idx];
unsigned int measured_us; unsigned int measured_us;
unsigned int new_factor; unsigned int new_factor;
......
...@@ -96,7 +96,6 @@ struct teo_idle_state { ...@@ -96,7 +96,6 @@ struct teo_idle_state {
* @time_span_ns: Time between idle state selection and post-wakeup update. * @time_span_ns: Time between idle state selection and post-wakeup update.
* @sleep_length_ns: Time till the closest timer event (at the selection time). * @sleep_length_ns: Time till the closest timer event (at the selection time).
* @states: Idle states data corresponding to this CPU. * @states: Idle states data corresponding to this CPU.
* @last_state: Idle state entered by the CPU last time.
* @interval_idx: Index of the most recent saved idle interval. * @interval_idx: Index of the most recent saved idle interval.
* @intervals: Saved idle duration values. * @intervals: Saved idle duration values.
*/ */
...@@ -104,7 +103,6 @@ struct teo_cpu { ...@@ -104,7 +103,6 @@ struct teo_cpu {
u64 time_span_ns; u64 time_span_ns;
u64 sleep_length_ns; u64 sleep_length_ns;
struct teo_idle_state states[CPUIDLE_STATE_MAX]; struct teo_idle_state states[CPUIDLE_STATE_MAX];
int last_state;
int interval_idx; int interval_idx;
unsigned int intervals[INTERVALS]; unsigned int intervals[INTERVALS];
}; };
...@@ -125,12 +123,15 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -125,12 +123,15 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) { if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
/* /*
* One of the safety nets has triggered or this was a timer * One of the safety nets has triggered or the wakeup was close
* wakeup (or equivalent). * enough to the closest timer event expected at the idle state
* selection time to be discarded.
*/ */
measured_us = sleep_length_us; measured_us = UINT_MAX;
} else { } else {
unsigned int lat = drv->states[cpu_data->last_state].exit_latency; unsigned int lat;
lat = drv->states[dev->last_state_idx].exit_latency;
measured_us = ktime_to_us(cpu_data->time_span_ns); measured_us = ktime_to_us(cpu_data->time_span_ns);
/* /*
...@@ -188,15 +189,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -188,15 +189,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
cpu_data->states[idx_timer].hits = hits; cpu_data->states[idx_timer].hits = hits;
} }
/*
* If the total time span between idle state selection and the "reflect"
* callback is greater than or equal to the sleep length determined at
* the idle state selection time, the wakeup is likely to be due to a
* timer event.
*/
if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns)
measured_us = UINT_MAX;
/* /*
* Save idle duration values corresponding to non-timer wakeups for * Save idle duration values corresponding to non-timer wakeups for
* pattern detection. * pattern detection.
...@@ -242,12 +234,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -242,12 +234,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
int latency_req = cpuidle_governor_latency_req(dev->cpu); int latency_req = cpuidle_governor_latency_req(dev->cpu);
unsigned int duration_us, count; unsigned int duration_us, count;
int max_early_idx, idx, i; int max_early_idx, constraint_idx, idx, i;
ktime_t delta_tick; ktime_t delta_tick;
if (cpu_data->last_state >= 0) { if (dev->last_state_idx >= 0) {
teo_update(drv, dev); teo_update(drv, dev);
cpu_data->last_state = -1; dev->last_state_idx = -1;
} }
cpu_data->time_span_ns = local_clock(); cpu_data->time_span_ns = local_clock();
...@@ -257,6 +249,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -257,6 +249,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
count = 0; count = 0;
max_early_idx = -1; max_early_idx = -1;
constraint_idx = drv->state_count;
idx = -1; idx = -1;
for (i = 0; i < drv->state_count; i++) { for (i = 0; i < drv->state_count; i++) {
...@@ -286,16 +279,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -286,16 +279,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
if (s->target_residency > duration_us) if (s->target_residency > duration_us)
break; break;
if (s->exit_latency > latency_req) { if (s->exit_latency > latency_req && constraint_idx > i)
/* constraint_idx = i;
* If we break out of the loop for latency reasons, use
* the target residency of the selected state as the
* expected idle duration to avoid stopping the tick
* as long as that target residency is low enough.
*/
duration_us = drv->states[idx].target_residency;
goto refine;
}
idx = i; idx = i;
...@@ -321,7 +306,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -321,7 +306,13 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
duration_us = drv->states[idx].target_residency; duration_us = drv->states[idx].target_residency;
} }
refine: /*
* If there is a latency constraint, it may be necessary to use a
* shallower idle state than the one selected so far.
*/
if (constraint_idx < idx)
idx = constraint_idx;
if (idx < 0) { if (idx < 0) {
idx = 0; /* No states enabled. Must use 0. */ idx = 0; /* No states enabled. Must use 0. */
} else if (idx > 0) { } else if (idx > 0) {
...@@ -331,13 +322,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -331,13 +322,12 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
/* /*
* Count and sum the most recent idle duration values less than * Count and sum the most recent idle duration values less than
* the target residency of the state selected so far, find the * the current expected idle duration value.
* max.
*/ */
for (i = 0; i < INTERVALS; i++) { for (i = 0; i < INTERVALS; i++) {
unsigned int val = cpu_data->intervals[i]; unsigned int val = cpu_data->intervals[i];
if (val >= drv->states[idx].target_residency) if (val >= duration_us)
continue; continue;
count++; count++;
...@@ -356,8 +346,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -356,8 +346,10 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* would be too shallow. * would be too shallow.
*/ */
if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) { if (!(tick_nohz_tick_stopped() && avg_us < TICK_USEC)) {
idx = teo_find_shallower_state(drv, dev, idx, avg_us);
duration_us = avg_us; duration_us = avg_us;
if (drv->states[idx].target_residency > avg_us)
idx = teo_find_shallower_state(drv, dev,
idx, avg_us);
} }
} }
} }
...@@ -394,7 +386,7 @@ static void teo_reflect(struct cpuidle_device *dev, int state) ...@@ -394,7 +386,7 @@ static void teo_reflect(struct cpuidle_device *dev, int state)
{ {
struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu); struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
cpu_data->last_state = state; dev->last_state_idx = state;
/* /*
* If the wakeup was not "natural", but triggered by one of the safety * If the wakeup was not "natural", but triggered by one of the safety
* nets, assume that the CPU might have been idle for the entire sleep * nets, assume that the CPU might have been idle for the entire sleep
......
...@@ -20,16 +20,9 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, ...@@ -20,16 +20,9 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
local_irq_enable(); local_irq_enable();
if (!current_set_polling_and_test()) { if (!current_set_polling_and_test()) {
unsigned int loop_count = 0; unsigned int loop_count = 0;
u64 limit = TICK_NSEC; u64 limit;
int i;
for (i = 1; i < drv->state_count; i++) { limit = cpuidle_poll_time(drv, dev);
if (drv->states[i].disabled || dev->states_usage[i].disable)
continue;
limit = (u64)drv->states[i].target_residency * NSEC_PER_USEC;
break;
}
while (!need_resched()) { while (!need_resched()) {
cpu_relax(); cpu_relax();
......
...@@ -334,6 +334,7 @@ struct cpuidle_state_kobj { ...@@ -334,6 +334,7 @@ struct cpuidle_state_kobj {
struct cpuidle_state_usage *state_usage; struct cpuidle_state_usage *state_usage;
struct completion kobj_unregister; struct completion kobj_unregister;
struct kobject kobj; struct kobject kobj;
struct cpuidle_device *device;
}; };
#ifdef CONFIG_SUSPEND #ifdef CONFIG_SUSPEND
...@@ -391,6 +392,7 @@ static inline void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *k ...@@ -391,6 +392,7 @@ static inline void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *k
#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) #define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
#define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state(k) (kobj_to_state_obj(k)->state)
#define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage)
#define kobj_to_device(k) (kobj_to_state_obj(k)->device)
#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr) #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr, static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr,
...@@ -414,10 +416,14 @@ static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr, ...@@ -414,10 +416,14 @@ static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr,
struct cpuidle_state *state = kobj_to_state(kobj); struct cpuidle_state *state = kobj_to_state(kobj);
struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj); struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj);
struct cpuidle_state_attr *cattr = attr_to_stateattr(attr); struct cpuidle_state_attr *cattr = attr_to_stateattr(attr);
struct cpuidle_device *dev = kobj_to_device(kobj);
if (cattr->store) if (cattr->store)
ret = cattr->store(state, state_usage, buf, size); ret = cattr->store(state, state_usage, buf, size);
/* reset poll time cache */
dev->poll_limit_ns = 0;
return ret; return ret;
} }
...@@ -468,6 +474,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) ...@@ -468,6 +474,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
} }
kobj->state = &drv->states[i]; kobj->state = &drv->states[i];
kobj->state_usage = &device->states_usage[i]; kobj->state_usage = &device->states_usage[i];
kobj->device = device;
init_completion(&kobj->kobj_unregister); init_completion(&kobj->kobj_unregister);
ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
......
...@@ -59,14 +59,14 @@ struct idle_inject_thread { ...@@ -59,14 +59,14 @@ struct idle_inject_thread {
/** /**
* struct idle_inject_device - idle injection data * struct idle_inject_device - idle injection data
* @timer: idle injection period timer * @timer: idle injection period timer
* @idle_duration_ms: duration of CPU idle time to inject * @idle_duration_us: duration of CPU idle time to inject
* @run_duration_ms: duration of CPU run time to allow * @run_duration_us: duration of CPU run time to allow
* @cpumask: mask of CPUs affected by idle injection * @cpumask: mask of CPUs affected by idle injection
*/ */
struct idle_inject_device { struct idle_inject_device {
struct hrtimer timer; struct hrtimer timer;
unsigned int idle_duration_ms; unsigned int idle_duration_us;
unsigned int run_duration_ms; unsigned int run_duration_us;
unsigned long int cpumask[0]; unsigned long int cpumask[0];
}; };
...@@ -104,16 +104,16 @@ static void idle_inject_wakeup(struct idle_inject_device *ii_dev) ...@@ -104,16 +104,16 @@ static void idle_inject_wakeup(struct idle_inject_device *ii_dev)
*/ */
static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
{ {
unsigned int duration_ms; unsigned int duration_us;
struct idle_inject_device *ii_dev = struct idle_inject_device *ii_dev =
container_of(timer, struct idle_inject_device, timer); container_of(timer, struct idle_inject_device, timer);
duration_ms = READ_ONCE(ii_dev->run_duration_ms); duration_us = READ_ONCE(ii_dev->run_duration_us);
duration_ms += READ_ONCE(ii_dev->idle_duration_ms); duration_us += READ_ONCE(ii_dev->idle_duration_us);
idle_inject_wakeup(ii_dev); idle_inject_wakeup(ii_dev);
hrtimer_forward_now(timer, ms_to_ktime(duration_ms)); hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC));
return HRTIMER_RESTART; return HRTIMER_RESTART;
} }
...@@ -138,35 +138,35 @@ static void idle_inject_fn(unsigned int cpu) ...@@ -138,35 +138,35 @@ static void idle_inject_fn(unsigned int cpu)
*/ */
iit->should_run = 0; iit->should_run = 0;
play_idle(READ_ONCE(ii_dev->idle_duration_ms)); play_idle(READ_ONCE(ii_dev->idle_duration_us));
} }
/** /**
* idle_inject_set_duration - idle and run duration update helper * idle_inject_set_duration - idle and run duration update helper
* @run_duration_ms: CPU run time to allow in milliseconds * @run_duration_us: CPU run time to allow in microseconds
* @idle_duration_ms: CPU idle time to inject in milliseconds * @idle_duration_us: CPU idle time to inject in microseconds
*/ */
void idle_inject_set_duration(struct idle_inject_device *ii_dev, void idle_inject_set_duration(struct idle_inject_device *ii_dev,
unsigned int run_duration_ms, unsigned int run_duration_us,
unsigned int idle_duration_ms) unsigned int idle_duration_us)
{ {
if (run_duration_ms && idle_duration_ms) { if (run_duration_us && idle_duration_us) {
WRITE_ONCE(ii_dev->run_duration_ms, run_duration_ms); WRITE_ONCE(ii_dev->run_duration_us, run_duration_us);
WRITE_ONCE(ii_dev->idle_duration_ms, idle_duration_ms); WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us);
} }
} }
/** /**
* idle_inject_get_duration - idle and run duration retrieval helper * idle_inject_get_duration - idle and run duration retrieval helper
* @run_duration_ms: memory location to store the current CPU run time * @run_duration_us: memory location to store the current CPU run time
* @idle_duration_ms: memory location to store the current CPU idle time * @idle_duration_us: memory location to store the current CPU idle time
*/ */
void idle_inject_get_duration(struct idle_inject_device *ii_dev, void idle_inject_get_duration(struct idle_inject_device *ii_dev,
unsigned int *run_duration_ms, unsigned int *run_duration_us,
unsigned int *idle_duration_ms) unsigned int *idle_duration_us)
{ {
*run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); *run_duration_us = READ_ONCE(ii_dev->run_duration_us);
*idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
} }
/** /**
...@@ -181,10 +181,10 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, ...@@ -181,10 +181,10 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev,
*/ */
int idle_inject_start(struct idle_inject_device *ii_dev) int idle_inject_start(struct idle_inject_device *ii_dev)
{ {
unsigned int idle_duration_ms = READ_ONCE(ii_dev->idle_duration_ms); unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us);
unsigned int run_duration_ms = READ_ONCE(ii_dev->run_duration_ms); unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us);
if (!idle_duration_ms || !run_duration_ms) if (!idle_duration_us || !run_duration_us)
return -EINVAL; return -EINVAL;
pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n",
...@@ -193,7 +193,8 @@ int idle_inject_start(struct idle_inject_device *ii_dev) ...@@ -193,7 +193,8 @@ int idle_inject_start(struct idle_inject_device *ii_dev)
idle_inject_wakeup(ii_dev); idle_inject_wakeup(ii_dev);
hrtimer_start(&ii_dev->timer, hrtimer_start(&ii_dev->timer,
ms_to_ktime(idle_duration_ms + run_duration_ms), ns_to_ktime((idle_duration_us + run_duration_us) *
NSEC_PER_USEC),
HRTIMER_MODE_REL); HRTIMER_MODE_REL);
return 0; return 0;
......
...@@ -430,7 +430,7 @@ static void clamp_idle_injection_func(struct kthread_work *work) ...@@ -430,7 +430,7 @@ static void clamp_idle_injection_func(struct kthread_work *work)
if (should_skip) if (should_skip)
goto balance; goto balance;
play_idle(jiffies_to_msecs(w_data->duration_jiffies)); play_idle(jiffies_to_usecs(w_data->duration_jiffies));
balance: balance:
if (clamping && w_data->clamping && cpu_online(w_data->cpu)) if (clamping && w_data->clamping && cpu_online(w_data->cpu))
......
...@@ -179,7 +179,7 @@ void arch_cpu_idle_dead(void); ...@@ -179,7 +179,7 @@ void arch_cpu_idle_dead(void);
int cpu_report_state(int cpu); int cpu_report_state(int cpu);
int cpu_check_up_prepare(int cpu); int cpu_check_up_prepare(int cpu);
void cpu_set_state_online(int cpu); void cpu_set_state_online(int cpu);
void play_idle(unsigned long duration_ms); void play_idle(unsigned long duration_us);
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
bool cpu_wait_death(unsigned int cpu, int seconds); bool cpu_wait_death(unsigned int cpu, int seconds);
......
...@@ -85,7 +85,9 @@ struct cpuidle_device { ...@@ -85,7 +85,9 @@ struct cpuidle_device {
unsigned int cpu; unsigned int cpu;
ktime_t next_hrtimer; ktime_t next_hrtimer;
int last_state_idx;
int last_residency; int last_residency;
u64 poll_limit_ns;
struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX];
struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
struct cpuidle_driver_kobj *kobj_driver; struct cpuidle_driver_kobj *kobj_driver;
...@@ -119,6 +121,9 @@ struct cpuidle_driver { ...@@ -119,6 +121,9 @@ struct cpuidle_driver {
/* the driver handles the cpus in cpumask */ /* the driver handles the cpus in cpumask */
struct cpumask *cpumask; struct cpumask *cpumask;
/* preferred governor to switch at register time */
const char *governor;
}; };
#ifdef CONFIG_CPU_IDLE #ifdef CONFIG_CPU_IDLE
...@@ -132,6 +137,8 @@ extern int cpuidle_select(struct cpuidle_driver *drv, ...@@ -132,6 +137,8 @@ extern int cpuidle_select(struct cpuidle_driver *drv,
extern int cpuidle_enter(struct cpuidle_driver *drv, extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index); struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
extern u64 cpuidle_poll_time(struct cpuidle_driver *drv,
struct cpuidle_device *dev);
extern int cpuidle_register_driver(struct cpuidle_driver *drv); extern int cpuidle_register_driver(struct cpuidle_driver *drv);
extern struct cpuidle_driver *cpuidle_get_driver(void); extern struct cpuidle_driver *cpuidle_get_driver(void);
...@@ -166,6 +173,9 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv, ...@@ -166,6 +173,9 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index) struct cpuidle_device *dev, int index)
{return -ENODEV; } {return -ENODEV; }
static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { }
static inline u64 cpuidle_poll_time(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{return 0; }
static inline int cpuidle_register_driver(struct cpuidle_driver *drv) static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
{return -ENODEV; } {return -ENODEV; }
static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; } static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _CPUIDLE_HALTPOLL_H
#define _CPUIDLE_HALTPOLL_H
#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
#include <asm/cpuidle_haltpoll.h>
#else
static inline void arch_haltpoll_enable(unsigned int cpu)
{
}
static inline void arch_haltpoll_disable(unsigned int cpu)
{
}
#endif
#endif
...@@ -20,10 +20,10 @@ int idle_inject_start(struct idle_inject_device *ii_dev); ...@@ -20,10 +20,10 @@ int idle_inject_start(struct idle_inject_device *ii_dev);
void idle_inject_stop(struct idle_inject_device *ii_dev); void idle_inject_stop(struct idle_inject_device *ii_dev);
void idle_inject_set_duration(struct idle_inject_device *ii_dev, void idle_inject_set_duration(struct idle_inject_device *ii_dev,
unsigned int run_duration_ms, unsigned int run_duration_us,
unsigned int idle_duration_ms); unsigned int idle_duration_us);
void idle_inject_get_duration(struct idle_inject_device *ii_dev, void idle_inject_get_duration(struct idle_inject_device *ii_dev,
unsigned int *run_duration_ms, unsigned int *run_duration_us,
unsigned int *idle_duration_ms); unsigned int *idle_duration_us);
#endif /* __IDLE_INJECT_H__ */ #endif /* __IDLE_INJECT_H__ */
...@@ -311,7 +311,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) ...@@ -311,7 +311,7 @@ static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART; return HRTIMER_NORESTART;
} }
void play_idle(unsigned long duration_ms) void play_idle(unsigned long duration_us)
{ {
struct idle_timer it; struct idle_timer it;
...@@ -323,7 +323,7 @@ void play_idle(unsigned long duration_ms) ...@@ -323,7 +323,7 @@ void play_idle(unsigned long duration_ms)
WARN_ON_ONCE(current->nr_cpus_allowed != 1); WARN_ON_ONCE(current->nr_cpus_allowed != 1);
WARN_ON_ONCE(!(current->flags & PF_KTHREAD)); WARN_ON_ONCE(!(current->flags & PF_KTHREAD));
WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY)); WARN_ON_ONCE(!(current->flags & PF_NO_SETAFFINITY));
WARN_ON_ONCE(!duration_ms); WARN_ON_ONCE(!duration_us);
rcu_sleep_check(); rcu_sleep_check();
preempt_disable(); preempt_disable();
...@@ -333,7 +333,8 @@ void play_idle(unsigned long duration_ms) ...@@ -333,7 +333,8 @@ void play_idle(unsigned long duration_ms)
it.done = 0; it.done = 0;
hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
it.timer.function = idle_inject_timer_fn; it.timer.function = idle_inject_timer_fn;
hrtimer_start(&it.timer, ms_to_ktime(duration_ms), HRTIMER_MODE_REL_PINNED); hrtimer_start(&it.timer, ns_to_ktime(duration_us * NSEC_PER_USEC),
HRTIMER_MODE_REL_PINNED);
while (!READ_ONCE(it.done)) while (!READ_ONCE(it.done))
do_idle(); do_idle();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment