Commit d027d45d authored by Frederic Weisbecker's avatar Frederic Weisbecker

nohz: New tick dependency mask

The tick dependency is evaluated on every IRQ and context switch. This
consists is a batch of checks which determine whether it is safe to
stop the tick or not. These checks are often split in many details:
posix cpu timers, scheduler, sched clock, perf events.... each of which
are made of smaller details: posix cpu timer involves checking process
wide timers then thread wide timers. Perf involves checking freq events
then more per cpu details.

Checking these informations asynchronously every time we update the full
dynticks state bring avoidable overhead and a messy layout.

Let's introduce instead tick dependency masks: one for system wide
dependency (unstable sched clock, freq based perf events), one for CPU
wide dependency (sched, throttling perf events), and task/signal level
dependencies (posix cpu timers). The subsystems are responsible
for setting and clearing their dependency through a set of APIs that will
take care of concurrent dependency mask modifications and kick targets
to restart the relevant CPU tick whenever needed.

This new dependency engine stays beside the old one until all subsystems
having a tick dependency are converted to it.
Suggested-by: default avatarThomas Gleixner <tglx@linutronix.de>
Suggested-by: default avatarPeter Zijlstra <peterz@infradead.org>
Reviewed-by: default avatarChris Metcalf <cmetcalf@ezchip.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Chris Metcalf <cmetcalf@ezchip.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
parent 8537bb95
...@@ -719,6 +719,10 @@ struct signal_struct { ...@@ -719,6 +719,10 @@ struct signal_struct {
/* Earliest-expiration cache. */ /* Earliest-expiration cache. */
struct task_cputime cputime_expires; struct task_cputime cputime_expires;
#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
struct list_head cpu_timers[3]; struct list_head cpu_timers[3];
struct pid *tty_old_pgrp; struct pid *tty_old_pgrp;
...@@ -1542,6 +1546,10 @@ struct task_struct { ...@@ -1542,6 +1546,10 @@ struct task_struct {
VTIME_SYS, VTIME_SYS,
} vtime_snap_whence; } vtime_snap_whence;
#endif #endif
#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */ unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */ u64 start_time; /* monotonic time in nsec */
u64 real_start_time; /* boot based time in nsec */ u64 real_start_time; /* boot based time in nsec */
......
...@@ -97,6 +97,18 @@ static inline void tick_broadcast_exit(void) ...@@ -97,6 +97,18 @@ static inline void tick_broadcast_exit(void)
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
} }
enum tick_dep_bits {
TICK_DEP_BIT_POSIX_TIMER = 0,
TICK_DEP_BIT_PERF_EVENTS = 1,
TICK_DEP_BIT_SCHED = 2,
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
};
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
extern int tick_nohz_enabled; extern int tick_nohz_enabled;
extern int tick_nohz_tick_stopped(void); extern int tick_nohz_tick_stopped(void);
...@@ -154,6 +166,72 @@ static inline int housekeeping_any_cpu(void) ...@@ -154,6 +166,72 @@ static inline int housekeeping_any_cpu(void)
return cpumask_any_and(housekeeping_mask, cpu_online_mask); return cpumask_any_and(housekeeping_mask, cpu_online_mask);
} }
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
/*
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
* on top of static keys.
*/
static inline void tick_dep_set(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set(bit);
}
static inline void tick_dep_clear(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear(bit);
}
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_set_cpu(cpu, bit);
}
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_clear_cpu(cpu, bit);
}
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_signal(signal, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_signal(signal, bit);
}
extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick(void);
extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void); extern void tick_nohz_full_kick_all(void);
...@@ -166,6 +244,20 @@ static inline int housekeeping_any_cpu(void) ...@@ -166,6 +244,20 @@ static inline int housekeeping_any_cpu(void)
static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
static inline void tick_dep_set(enum tick_dep_bits bit) { }
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { } static inline void tick_nohz_full_kick_all(void) { }
......
...@@ -158,11 +158,53 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) ...@@ -158,11 +158,53 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
cpumask_var_t tick_nohz_full_mask; cpumask_var_t tick_nohz_full_mask;
cpumask_var_t housekeeping_mask; cpumask_var_t housekeeping_mask;
bool tick_nohz_full_running; bool tick_nohz_full_running;
static unsigned long tick_dep_mask;
static bool can_stop_full_tick(void) static void trace_tick_dependency(unsigned long dep)
{
if (dep & TICK_DEP_MASK_POSIX_TIMER) {
trace_tick_stop(0, "posix timers running\n");
return;
}
if (dep & TICK_DEP_MASK_PERF_EVENTS) {
trace_tick_stop(0, "perf events running\n");
return;
}
if (dep & TICK_DEP_MASK_SCHED) {
trace_tick_stop(0, "more than 1 task in runqueue\n");
return;
}
if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
trace_tick_stop(0, "unstable sched clock\n");
}
static bool can_stop_full_tick(struct tick_sched *ts)
{ {
WARN_ON_ONCE(!irqs_disabled()); WARN_ON_ONCE(!irqs_disabled());
if (tick_dep_mask) {
trace_tick_dependency(tick_dep_mask);
return false;
}
if (ts->tick_dep_mask) {
trace_tick_dependency(ts->tick_dep_mask);
return false;
}
if (current->tick_dep_mask) {
trace_tick_dependency(current->tick_dep_mask);
return false;
}
if (current->signal->tick_dep_mask) {
trace_tick_dependency(current->signal->tick_dep_mask);
return false;
}
if (!sched_can_stop_tick()) { if (!sched_can_stop_tick()) {
trace_tick_stop(0, "more than 1 task in runqueue\n"); trace_tick_stop(0, "more than 1 task in runqueue\n");
return false; return false;
...@@ -178,9 +220,10 @@ static bool can_stop_full_tick(void) ...@@ -178,9 +220,10 @@ static bool can_stop_full_tick(void)
return false; return false;
} }
/* sched_clock_tick() needs us? */
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
/* /*
* sched_clock_tick() needs us?
*
* TODO: kick full dynticks CPUs when * TODO: kick full dynticks CPUs when
* sched_clock_stable is set. * sched_clock_stable is set.
*/ */
...@@ -199,13 +242,13 @@ static bool can_stop_full_tick(void) ...@@ -199,13 +242,13 @@ static bool can_stop_full_tick(void)
return true; return true;
} }
static void nohz_full_kick_work_func(struct irq_work *work) static void nohz_full_kick_func(struct irq_work *work)
{ {
/* Empty, the tick restart happens on tick_nohz_irq_exit() */ /* Empty, the tick restart happens on tick_nohz_irq_exit() */
} }
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_work_func, .func = nohz_full_kick_func,
}; };
/* /*
...@@ -251,6 +294,95 @@ void tick_nohz_full_kick_all(void) ...@@ -251,6 +294,95 @@ void tick_nohz_full_kick_all(void)
preempt_enable(); preempt_enable();
} }
static void tick_nohz_dep_set_all(unsigned long *dep,
enum tick_dep_bits bit)
{
unsigned long prev;
prev = fetch_or(dep, BIT_MASK(bit));
if (!prev)
tick_nohz_full_kick_all();
}
/*
* Set a global tick dependency. Used by perf events that rely on freq and
* by unstable clock.
*/
void tick_nohz_dep_set(enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&tick_dep_mask, bit);
}
void tick_nohz_dep_clear(enum tick_dep_bits bit)
{
clear_bit(bit, &tick_dep_mask);
}
/*
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
* manage events throttling.
*/
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
unsigned long prev;
struct tick_sched *ts;
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
if (!prev) {
preempt_disable();
/* Perf needs local kick that is NMI safe */
if (cpu == smp_processor_id()) {
tick_nohz_full_kick();
} else {
/* Remote irq work not NMI-safe */
if (!WARN_ON_ONCE(in_nmi()))
tick_nohz_full_kick_cpu(cpu);
}
preempt_enable();
}
}
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
clear_bit(bit, &ts->tick_dep_mask);
}
/*
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
* per task timers.
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
/*
* We could optimize this with just kicking the target running the task
* if that noise matters for nohz full users.
*/
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
}
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
clear_bit(bit, &tsk->tick_dep_mask);
}
/*
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
}
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
clear_bit(bit, &sig->tick_dep_mask);
}
/* /*
* Re-evaluate the need for the tick as we switch the current task. * Re-evaluate the need for the tick as we switch the current task.
* It might need the tick due to per task/process properties: * It might need the tick due to per task/process properties:
...@@ -259,15 +391,19 @@ void tick_nohz_full_kick_all(void) ...@@ -259,15 +391,19 @@ void tick_nohz_full_kick_all(void)
void __tick_nohz_task_switch(void) void __tick_nohz_task_switch(void)
{ {
unsigned long flags; unsigned long flags;
struct tick_sched *ts;
local_irq_save(flags); local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id())) if (!tick_nohz_full_cpu(smp_processor_id()))
goto out; goto out;
if (tick_nohz_tick_stopped() && !can_stop_full_tick()) ts = this_cpu_ptr(&tick_cpu_sched);
tick_nohz_full_kick();
if (ts->tick_stopped) {
if (current->tick_dep_mask || current->signal->tick_dep_mask)
tick_nohz_full_kick();
}
out: out:
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -736,7 +872,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) ...@@ -736,7 +872,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return; return;
if (can_stop_full_tick()) if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped) else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get(), 1); tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
......
...@@ -60,6 +60,7 @@ struct tick_sched { ...@@ -60,6 +60,7 @@ struct tick_sched {
u64 next_timer; u64 next_timer;
ktime_t idle_expires; ktime_t idle_expires;
int do_timer_last; int do_timer_last;
unsigned long tick_dep_mask;
}; };
extern struct tick_sched *tick_get_tick_sched(int cpu); extern struct tick_sched *tick_get_tick_sched(int cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment