Commit 51798dea authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branches 'pm-cpuidle' and 'pm-qos'

* pm-cpuidle:
  tick-sched: avoid a maybe-uninitialized warning
  cpuidle: Add definition of residency to sysfs documentation
  time: hrtimer: Use timerqueue_iterate_next() to get to the next timer
  nohz: Avoid duplication of code related to got_idle_tick
  nohz: Gather tick_sched booleans under a common flag field
  cpuidle: menu: Avoid selecting shallow states with stopped tick
  cpuidle: menu: Refine idle state selection for running tick
  sched: idle: Select idle state before stopping the tick
  time: hrtimer: Introduce hrtimer_next_event_without()
  time: tick-sched: Split tick_nohz_stop_sched_tick()
  cpuidle: Return nohz hint from cpuidle_select()
  jiffies: Introduce USER_TICK_USEC and redefine TICK_USEC
  sched: idle: Do not stop the tick before cpuidle_idle_call()
  sched: idle: Do not stop the tick upfront in the idle loop
  time: tick-sched: Reorganize idle tick management code

* pm-qos:
  PM / QoS: mark expected switch fall-throughs
...@@ -40,6 +40,7 @@ total 0 ...@@ -40,6 +40,7 @@ total 0
-r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 latency
-r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 name
-r--r--r-- 1 root root 4096 Feb 8 10:42 power -r--r--r-- 1 root root 4096 Feb 8 10:42 power
-r--r--r-- 1 root root 4096 Feb 8 10:42 residency
-r--r--r-- 1 root root 4096 Feb 8 10:42 time -r--r--r-- 1 root root 4096 Feb 8 10:42 time
-r--r--r-- 1 root root 4096 Feb 8 10:42 usage -r--r--r-- 1 root root 4096 Feb 8 10:42 usage
...@@ -50,6 +51,7 @@ total 0 ...@@ -50,6 +51,7 @@ total 0
-r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 latency
-r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 name
-r--r--r-- 1 root root 4096 Feb 8 10:42 power -r--r--r-- 1 root root 4096 Feb 8 10:42 power
-r--r--r-- 1 root root 4096 Feb 8 10:42 residency
-r--r--r-- 1 root root 4096 Feb 8 10:42 time -r--r--r-- 1 root root 4096 Feb 8 10:42 time
-r--r--r-- 1 root root 4096 Feb 8 10:42 usage -r--r--r-- 1 root root 4096 Feb 8 10:42 usage
...@@ -60,6 +62,7 @@ total 0 ...@@ -60,6 +62,7 @@ total 0
-r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 latency
-r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 name
-r--r--r-- 1 root root 4096 Feb 8 10:42 power -r--r--r-- 1 root root 4096 Feb 8 10:42 power
-r--r--r-- 1 root root 4096 Feb 8 10:42 residency
-r--r--r-- 1 root root 4096 Feb 8 10:42 time -r--r--r-- 1 root root 4096 Feb 8 10:42 time
-r--r--r-- 1 root root 4096 Feb 8 10:42 usage -r--r--r-- 1 root root 4096 Feb 8 10:42 usage
...@@ -70,6 +73,7 @@ total 0 ...@@ -70,6 +73,7 @@ total 0
-r--r--r-- 1 root root 4096 Feb 8 10:42 latency -r--r--r-- 1 root root 4096 Feb 8 10:42 latency
-r--r--r-- 1 root root 4096 Feb 8 10:42 name -r--r--r-- 1 root root 4096 Feb 8 10:42 name
-r--r--r-- 1 root root 4096 Feb 8 10:42 power -r--r--r-- 1 root root 4096 Feb 8 10:42 power
-r--r--r-- 1 root root 4096 Feb 8 10:42 residency
-r--r--r-- 1 root root 4096 Feb 8 10:42 time -r--r--r-- 1 root root 4096 Feb 8 10:42 time
-r--r--r-- 1 root root 4096 Feb 8 10:42 usage -r--r--r-- 1 root root 4096 Feb 8 10:42 usage
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
...@@ -78,6 +82,8 @@ total 0 ...@@ -78,6 +82,8 @@ total 0
* desc : Small description about the idle state (string) * desc : Small description about the idle state (string)
* disable : Option to disable this idle state (bool) -> see note below * disable : Option to disable this idle state (bool) -> see note below
* latency : Latency to exit out of this idle state (in microseconds) * latency : Latency to exit out of this idle state (in microseconds)
* residency : Time after which a state becomes more effecient than any
shallower state (in microseconds)
* name : Name of the idle state (string) * name : Name of the idle state (string)
* power : Power consumed while in this idle state (in milliwatts) * power : Power consumed while in this idle state (in milliwatts)
* time : Total time spent in this idle state (in microseconds) * time : Total time spent in this idle state (in microseconds)
......
...@@ -425,6 +425,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */ ...@@ -425,6 +425,7 @@ static void xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
* data back is to call: * data back is to call:
*/ */
tick_nohz_idle_enter(); tick_nohz_idle_enter();
tick_nohz_idle_stop_tick_protected();
cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE); cpuhp_online_idle(CPUHP_AP_ONLINE_IDLE);
} }
......
...@@ -272,12 +272,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, ...@@ -272,12 +272,18 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
* *
* @drv: the cpuidle driver * @drv: the cpuidle driver
* @dev: the cpuidle device * @dev: the cpuidle device
* @stop_tick: indication on whether or not to stop the tick
* *
* Returns the index of the idle state. The return value must not be negative. * Returns the index of the idle state. The return value must not be negative.
*
* The memory location pointed to by @stop_tick is expected to be written the
* 'false' boolean value if the scheduler tick should not be stopped before
* entering the returned state.
*/ */
int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{ {
return cpuidle_curr_governor->select(drv, dev); return cpuidle_curr_governor->select(drv, dev, stop_tick);
} }
/** /**
......
...@@ -63,9 +63,10 @@ static inline void ladder_do_selection(struct ladder_device *ldev, ...@@ -63,9 +63,10 @@ static inline void ladder_do_selection(struct ladder_device *ldev,
* ladder_select_state - selects the next state to enter * ladder_select_state - selects the next state to enter
* @drv: cpuidle driver * @drv: cpuidle driver
* @dev: the CPU * @dev: the CPU
* @dummy: not used
*/ */
static int ladder_select_state(struct cpuidle_driver *drv, static int ladder_select_state(struct cpuidle_driver *drv,
struct cpuidle_device *dev) struct cpuidle_device *dev, bool *dummy)
{ {
struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
struct device *device = get_cpu_device(dev->cpu); struct device *device = get_cpu_device(dev->cpu);
......
...@@ -123,6 +123,7 @@ ...@@ -123,6 +123,7 @@
struct menu_device { struct menu_device {
int last_state_idx; int last_state_idx;
int needs_update; int needs_update;
int tick_wakeup;
unsigned int next_timer_us; unsigned int next_timer_us;
unsigned int predicted_us; unsigned int predicted_us;
...@@ -279,8 +280,10 @@ static unsigned int get_typical_interval(struct menu_device *data) ...@@ -279,8 +280,10 @@ static unsigned int get_typical_interval(struct menu_device *data)
* menu_select - selects the next idle state to enter * menu_select - selects the next idle state to enter
* @drv: cpuidle driver containing state data * @drv: cpuidle driver containing state data
* @dev: the CPU * @dev: the CPU
* @stop_tick: indication on whether or not to stop the tick
*/ */
static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
bool *stop_tick)
{ {
struct menu_device *data = this_cpu_ptr(&menu_devices); struct menu_device *data = this_cpu_ptr(&menu_devices);
struct device *device = get_cpu_device(dev->cpu); struct device *device = get_cpu_device(dev->cpu);
...@@ -292,6 +295,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -292,6 +295,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
unsigned int expected_interval; unsigned int expected_interval;
unsigned long nr_iowaiters, cpu_load; unsigned long nr_iowaiters, cpu_load;
int resume_latency = dev_pm_qos_raw_read_value(device); int resume_latency = dev_pm_qos_raw_read_value(device);
ktime_t delta_next;
if (data->needs_update) { if (data->needs_update) {
menu_update(drv, dev); menu_update(drv, dev);
...@@ -303,11 +307,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -303,11 +307,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
latency_req = resume_latency; latency_req = resume_latency;
/* Special case when user has set very strict latency requirement */ /* Special case when user has set very strict latency requirement */
if (unlikely(latency_req == 0)) if (unlikely(latency_req == 0)) {
*stop_tick = false;
return 0; return 0;
}
/* determine the expected residency time, round up */ /* determine the expected residency time, round up */
data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length()); data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
get_iowait_load(&nr_iowaiters, &cpu_load); get_iowait_load(&nr_iowaiters, &cpu_load);
data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
...@@ -346,6 +352,20 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -346,6 +352,20 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
*/ */
data->predicted_us = min(data->predicted_us, expected_interval); data->predicted_us = min(data->predicted_us, expected_interval);
if (tick_nohz_tick_stopped()) {
/*
* If the tick is already stopped, the cost of possible short
* idle duration misprediction is much higher, because the CPU
* may be stuck in a shallow idle state for a long time as a
* result of it. In that case say we might mispredict and try
* to force the CPU into a state for which we would have stopped
* the tick, unless a timer is going to expire really soon
* anyway.
*/
if (data->predicted_us < TICK_USEC)
data->predicted_us = min_t(unsigned int, TICK_USEC,
ktime_to_us(delta_next));
} else {
/* /*
* Use the performance multiplier and the user-configurable * Use the performance multiplier and the user-configurable
* latency_req to determine the maximum exit latency. * latency_req to determine the maximum exit latency.
...@@ -353,7 +373,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -353,7 +373,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load); interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
if (latency_req > interactivity_req) if (latency_req > interactivity_req)
latency_req = interactivity_req; latency_req = interactivity_req;
}
expected_interval = data->predicted_us;
/* /*
* Find the idle state with the lowest power while satisfying * Find the idle state with the lowest power while satisfying
* our constraints. * our constraints.
...@@ -369,15 +391,52 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -369,15 +391,52 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
idx = i; /* first enabled state */ idx = i; /* first enabled state */
if (s->target_residency > data->predicted_us) if (s->target_residency > data->predicted_us)
break; break;
if (s->exit_latency > latency_req) if (s->exit_latency > latency_req) {
/*
* If we break out of the loop for latency reasons, use
* the target residency of the selected state as the
* expected idle duration so that the tick is retained
* as long as that target residency is low enough.
*/
expected_interval = drv->states[idx].target_residency;
break; break;
}
idx = i; idx = i;
} }
if (idx == -1) if (idx == -1)
idx = 0; /* No states enabled. Must use 0. */ idx = 0; /* No states enabled. Must use 0. */
/*
* Don't stop the tick if the selected state is a polling one or if the
* expected idle duration is shorter than the tick period length.
*/
if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
expected_interval < TICK_USEC) {
unsigned int delta_next_us = ktime_to_us(delta_next);
*stop_tick = false;
if (!tick_nohz_tick_stopped() && idx > 0 &&
drv->states[idx].target_residency > delta_next_us) {
/*
* The tick is not going to be stopped and the target
* residency of the state to be returned is not within
* the time until the next timer event including the
* tick, so try to correct that.
*/
for (i = idx - 1; i >= 0; i--) {
if (drv->states[i].disabled ||
dev->states_usage[i].disable)
continue;
idx = i;
if (drv->states[i].target_residency <= delta_next_us)
break;
}
}
}
data->last_state_idx = idx; data->last_state_idx = idx;
return data->last_state_idx; return data->last_state_idx;
...@@ -397,6 +456,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) ...@@ -397,6 +456,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index)
data->last_state_idx = index; data->last_state_idx = index;
data->needs_update = 1; data->needs_update = 1;
data->tick_wakeup = tick_nohz_idle_got_tick();
} }
/** /**
...@@ -427,6 +487,18 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -427,6 +487,18 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* assume the state was never reached and the exit latency is 0. * assume the state was never reached and the exit latency is 0.
*/ */
if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
/*
* The nohz code said that there wouldn't be any events within
* the tick boundary (if the tick was stopped), but the idle
* duration predictor had a differing opinion. Since the CPU
* was woken up by a tick (that wasn't stopped after all), the
* predictor was not quite right, so assume that the CPU could
* have been idle long (but not forever) to help the idle
* duration predictor do a better job next time.
*/
measured_us = 9 * MAX_INTERESTING / 10;
} else {
/* measured value */ /* measured value */
measured_us = cpuidle_get_last_residency(dev); measured_us = cpuidle_get_last_residency(dev);
...@@ -435,6 +507,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) ...@@ -435,6 +507,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
measured_us -= target->exit_latency; measured_us -= target->exit_latency;
else else
measured_us /= 2; measured_us /= 2;
}
/* Make sure our coefficients do not exceed unity */ /* Make sure our coefficients do not exceed unity */
if (measured_us > data->next_timer_us) if (measured_us > data->next_timer_us)
......
...@@ -375,7 +375,7 @@ static int efx_mcdi_poll(struct efx_nic *efx) ...@@ -375,7 +375,7 @@ static int efx_mcdi_poll(struct efx_nic *efx)
* because generally mcdi responses are fast. After that, back off * because generally mcdi responses are fast. After that, back off
* and poll once a jiffy (approximately) * and poll once a jiffy (approximately)
*/ */
spins = TICK_USEC; spins = USER_TICK_USEC;
finish = jiffies + MCDI_RPC_TIMEOUT; finish = jiffies + MCDI_RPC_TIMEOUT;
while (1) { while (1) {
......
...@@ -135,7 +135,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv, ...@@ -135,7 +135,8 @@ extern bool cpuidle_not_available(struct cpuidle_driver *drv,
struct cpuidle_device *dev); struct cpuidle_device *dev);
extern int cpuidle_select(struct cpuidle_driver *drv, extern int cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev); struct cpuidle_device *dev,
bool *stop_tick);
extern int cpuidle_enter(struct cpuidle_driver *drv, extern int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index); struct cpuidle_device *dev, int index);
extern void cpuidle_reflect(struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index);
...@@ -167,7 +168,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv, ...@@ -167,7 +168,7 @@ static inline bool cpuidle_not_available(struct cpuidle_driver *drv,
struct cpuidle_device *dev) struct cpuidle_device *dev)
{return true; } {return true; }
static inline int cpuidle_select(struct cpuidle_driver *drv, static inline int cpuidle_select(struct cpuidle_driver *drv,
struct cpuidle_device *dev) struct cpuidle_device *dev, bool *stop_tick)
{return -ENODEV; } {return -ENODEV; }
static inline int cpuidle_enter(struct cpuidle_driver *drv, static inline int cpuidle_enter(struct cpuidle_driver *drv,
struct cpuidle_device *dev, int index) struct cpuidle_device *dev, int index)
...@@ -250,7 +251,8 @@ struct cpuidle_governor { ...@@ -250,7 +251,8 @@ struct cpuidle_governor {
struct cpuidle_device *dev); struct cpuidle_device *dev);
int (*select) (struct cpuidle_driver *drv, int (*select) (struct cpuidle_driver *drv,
struct cpuidle_device *dev); struct cpuidle_device *dev,
bool *stop_tick);
void (*reflect) (struct cpuidle_device *dev, int index); void (*reflect) (struct cpuidle_device *dev, int index);
}; };
......
...@@ -424,6 +424,7 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer) ...@@ -424,6 +424,7 @@ static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
} }
extern u64 hrtimer_get_next_event(void); extern u64 hrtimer_get_next_event(void);
extern u64 hrtimer_next_event_without(const struct hrtimer *exclude);
extern bool hrtimer_active(const struct hrtimer *timer); extern bool hrtimer_active(const struct hrtimer *timer);
......
...@@ -62,8 +62,11 @@ extern int register_refined_jiffies(long clock_tick_rate); ...@@ -62,8 +62,11 @@ extern int register_refined_jiffies(long clock_tick_rate);
/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) #define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ /* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */
#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ)
/* USER_TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
#define USER_TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
#ifndef __jiffy_arch_data #ifndef __jiffy_arch_data
#define __jiffy_arch_data #define __jiffy_arch_data
......
...@@ -115,27 +115,46 @@ enum tick_dep_bits { ...@@ -115,27 +115,46 @@ enum tick_dep_bits {
extern bool tick_nohz_enabled; extern bool tick_nohz_enabled;
extern bool tick_nohz_tick_stopped(void); extern bool tick_nohz_tick_stopped(void);
extern bool tick_nohz_tick_stopped_cpu(int cpu); extern bool tick_nohz_tick_stopped_cpu(int cpu);
extern void tick_nohz_idle_stop_tick(void);
extern void tick_nohz_idle_retain_tick(void);
extern void tick_nohz_idle_restart_tick(void);
extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void); extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void); extern void tick_nohz_irq_exit(void);
extern ktime_t tick_nohz_get_sleep_length(void); extern bool tick_nohz_idle_got_tick(void);
extern ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next);
extern unsigned long tick_nohz_get_idle_calls(void); extern unsigned long tick_nohz_get_idle_calls(void);
extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
static inline void tick_nohz_idle_stop_tick_protected(void)
{
local_irq_disable();
tick_nohz_idle_stop_tick();
local_irq_enable();
}
#else /* !CONFIG_NO_HZ_COMMON */ #else /* !CONFIG_NO_HZ_COMMON */
#define tick_nohz_enabled (0) #define tick_nohz_enabled (0)
static inline int tick_nohz_tick_stopped(void) { return 0; } static inline int tick_nohz_tick_stopped(void) { return 0; }
static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; } static inline int tick_nohz_tick_stopped_cpu(int cpu) { return 0; }
static inline void tick_nohz_idle_stop_tick(void) { }
static inline void tick_nohz_idle_retain_tick(void) { }
static inline void tick_nohz_idle_restart_tick(void) { }
static inline void tick_nohz_idle_enter(void) { } static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { } static inline void tick_nohz_idle_exit(void) { }
static inline bool tick_nohz_idle_got_tick(void) { return false; }
static inline ktime_t tick_nohz_get_sleep_length(void) static inline ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{ {
return NSEC_PER_SEC / HZ; *delta_next = TICK_NSEC;
return *delta_next;
} }
static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
static inline void tick_nohz_idle_stop_tick_protected(void) { }
#endif /* !CONFIG_NO_HZ_COMMON */ #endif /* !CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
......
...@@ -295,6 +295,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, ...@@ -295,6 +295,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
* changed * changed
*/ */
plist_del(node, &c->list); plist_del(node, &c->list);
/* fall through */
case PM_QOS_ADD_REQ: case PM_QOS_ADD_REQ:
plist_node_init(node, new_value); plist_node_init(node, new_value);
plist_add(node, &c->list); plist_add(node, &c->list);
...@@ -367,6 +368,7 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf, ...@@ -367,6 +368,7 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf,
break; break;
case PM_QOS_UPDATE_REQ: case PM_QOS_UPDATE_REQ:
pm_qos_flags_remove_req(pqf, req); pm_qos_flags_remove_req(pqf, req);
/* fall through */
case PM_QOS_ADD_REQ: case PM_QOS_ADD_REQ:
req->flags = val; req->flags = val;
INIT_LIST_HEAD(&req->node); INIT_LIST_HEAD(&req->node);
......
...@@ -141,13 +141,15 @@ static void cpuidle_idle_call(void) ...@@ -141,13 +141,15 @@ static void cpuidle_idle_call(void)
} }
/* /*
* Tell the RCU framework we are entering an idle section, * The RCU framework needs to be told that we are entering an idle
* so no more rcu read side critical sections and one more * section, so no more rcu read side critical sections and one more
* step to the grace period * step to the grace period
*/ */
rcu_idle_enter();
if (cpuidle_not_available(drv, dev)) { if (cpuidle_not_available(drv, dev)) {
tick_nohz_idle_stop_tick();
rcu_idle_enter();
default_idle_call(); default_idle_call();
goto exit_idle; goto exit_idle;
} }
...@@ -164,20 +166,37 @@ static void cpuidle_idle_call(void) ...@@ -164,20 +166,37 @@ static void cpuidle_idle_call(void)
if (idle_should_enter_s2idle() || dev->use_deepest_state) { if (idle_should_enter_s2idle() || dev->use_deepest_state) {
if (idle_should_enter_s2idle()) { if (idle_should_enter_s2idle()) {
rcu_idle_enter();
entered_state = cpuidle_enter_s2idle(drv, dev); entered_state = cpuidle_enter_s2idle(drv, dev);
if (entered_state > 0) { if (entered_state > 0) {
local_irq_enable(); local_irq_enable();
goto exit_idle; goto exit_idle;
} }
rcu_idle_exit();
} }
tick_nohz_idle_stop_tick();
rcu_idle_enter();
next_state = cpuidle_find_deepest_state(drv, dev); next_state = cpuidle_find_deepest_state(drv, dev);
call_cpuidle(drv, dev, next_state); call_cpuidle(drv, dev, next_state);
} else { } else {
bool stop_tick = true;
/* /*
* Ask the cpuidle framework to choose a convenient idle state. * Ask the cpuidle framework to choose a convenient idle state.
*/ */
next_state = cpuidle_select(drv, dev); next_state = cpuidle_select(drv, dev, &stop_tick);
if (stop_tick)
tick_nohz_idle_stop_tick();
else
tick_nohz_idle_retain_tick();
rcu_idle_enter();
entered_state = call_cpuidle(drv, dev, next_state); entered_state = call_cpuidle(drv, dev, next_state);
/* /*
* Give the governor an opportunity to reflect on the outcome * Give the governor an opportunity to reflect on the outcome
...@@ -222,6 +241,7 @@ static void do_idle(void) ...@@ -222,6 +241,7 @@ static void do_idle(void)
rmb(); rmb();
if (cpu_is_offline(cpu)) { if (cpu_is_offline(cpu)) {
tick_nohz_idle_stop_tick_protected();
cpuhp_report_idle_dead(); cpuhp_report_idle_dead();
arch_cpu_idle_dead(); arch_cpu_idle_dead();
} }
...@@ -235,10 +255,12 @@ static void do_idle(void) ...@@ -235,10 +255,12 @@ static void do_idle(void)
* broadcast device expired for us, we don't want to go deep * broadcast device expired for us, we don't want to go deep
* idle as we know that the IPI is going to arrive right away. * idle as we know that the IPI is going to arrive right away.
*/ */
if (cpu_idle_force_poll || tick_check_broadcast_expired()) if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
tick_nohz_idle_restart_tick();
cpu_idle_poll(); cpu_idle_poll();
else } else {
cpuidle_idle_call(); cpuidle_idle_call();
}
arch_cpu_idle_exit(); arch_cpu_idle_exit();
} }
......
...@@ -480,6 +480,7 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) ...@@ -480,6 +480,7 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
while ((base = __next_base((cpu_base), &(active)))) while ((base = __next_base((cpu_base), &(active))))
static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
const struct hrtimer *exclude,
unsigned int active, unsigned int active,
ktime_t expires_next) ktime_t expires_next)
{ {
...@@ -492,9 +493,22 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, ...@@ -492,9 +493,22 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
next = timerqueue_getnext(&base->active); next = timerqueue_getnext(&base->active);
timer = container_of(next, struct hrtimer, node); timer = container_of(next, struct hrtimer, node);
if (timer == exclude) {
/* Get to the next timer in the queue. */
next = timerqueue_iterate_next(next);
if (!next)
continue;
timer = container_of(next, struct hrtimer, node);
}
expires = ktime_sub(hrtimer_get_expires(timer), base->offset); expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
if (expires < expires_next) { if (expires < expires_next) {
expires_next = expires; expires_next = expires;
/* Skip cpu_base update if a timer is being excluded. */
if (exclude)
continue;
if (timer->is_soft) if (timer->is_soft)
cpu_base->softirq_next_timer = timer; cpu_base->softirq_next_timer = timer;
else else
...@@ -538,7 +552,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_ ...@@ -538,7 +552,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
cpu_base->softirq_next_timer = NULL; cpu_base->softirq_next_timer = NULL;
expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX); expires_next = __hrtimer_next_event_base(cpu_base, NULL,
active, KTIME_MAX);
next_timer = cpu_base->softirq_next_timer; next_timer = cpu_base->softirq_next_timer;
} }
...@@ -546,7 +561,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_ ...@@ -546,7 +561,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
if (active_mask & HRTIMER_ACTIVE_HARD) { if (active_mask & HRTIMER_ACTIVE_HARD) {
active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
cpu_base->next_timer = next_timer; cpu_base->next_timer = next_timer;
expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
expires_next);
} }
return expires_next; return expires_next;
...@@ -1190,6 +1206,39 @@ u64 hrtimer_get_next_event(void) ...@@ -1190,6 +1206,39 @@ u64 hrtimer_get_next_event(void)
return expires; return expires;
} }
/**
* hrtimer_next_event_without - time until next expiry event w/o one timer
* @exclude: timer to exclude
*
* Returns the next expiry time over all timers except for the @exclude one or
* KTIME_MAX if none of them is pending.
*/
u64 hrtimer_next_event_without(const struct hrtimer *exclude)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
u64 expires = KTIME_MAX;
unsigned long flags;
raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (__hrtimer_hres_active(cpu_base)) {
unsigned int active;
if (!cpu_base->softirq_activated) {
active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
expires = __hrtimer_next_event_base(cpu_base, exclude,
active, KTIME_MAX);
}
active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
expires = __hrtimer_next_event_base(cpu_base, exclude, active,
expires);
}
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
return expires;
}
#endif #endif
static inline int hrtimer_clockid_to_base(clockid_t clock_id) static inline int hrtimer_clockid_to_base(clockid_t clock_id)
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
/* USER_HZ period (usecs): */ /* USER_HZ period (usecs): */
unsigned long tick_usec = TICK_USEC; unsigned long tick_usec = USER_TICK_USEC;
/* SHIFTED_HZ period (nsecs): */ /* SHIFTED_HZ period (nsecs): */
unsigned long tick_nsec; unsigned long tick_nsec;
......
This diff is collapsed.
...@@ -38,31 +38,37 @@ enum tick_nohz_mode { ...@@ -38,31 +38,37 @@ enum tick_nohz_mode {
* @idle_exittime: Time when the idle state was left * @idle_exittime: Time when the idle state was left
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped * @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding * @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
* @sleep_length: Duration of the current idle sleep * @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @do_timer_lst: CPU was the last one doing do_timer before going idle * @do_timer_lst: CPU was the last one doing do_timer before going idle
* @got_idle_tick: Tick timer function has run with @inidle set
*/ */
struct tick_sched { struct tick_sched {
struct hrtimer sched_timer; struct hrtimer sched_timer;
unsigned long check_clocks; unsigned long check_clocks;
enum tick_nohz_mode nohz_mode; enum tick_nohz_mode nohz_mode;
unsigned int inidle : 1;
unsigned int tick_stopped : 1;
unsigned int idle_active : 1;
unsigned int do_timer_last : 1;
unsigned int got_idle_tick : 1;
ktime_t last_tick; ktime_t last_tick;
ktime_t next_tick; ktime_t next_tick;
int inidle;
int tick_stopped;
unsigned long idle_jiffies; unsigned long idle_jiffies;
unsigned long idle_calls; unsigned long idle_calls;
unsigned long idle_sleeps; unsigned long idle_sleeps;
int idle_active;
ktime_t idle_entrytime; ktime_t idle_entrytime;
ktime_t idle_waketime; ktime_t idle_waketime;
ktime_t idle_exittime; ktime_t idle_exittime;
ktime_t idle_sleeptime; ktime_t idle_sleeptime;
ktime_t iowait_sleeptime; ktime_t iowait_sleeptime;
ktime_t sleep_length;
unsigned long last_jiffies; unsigned long last_jiffies;
u64 timer_expires;
u64 timer_expires_base;
u64 next_timer; u64 next_timer;
ktime_t idle_expires; ktime_t idle_expires;
int do_timer_last;
atomic_t tick_dep_mask; atomic_t tick_dep_mask;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment