Commit 1f251846 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'timers/core-v9' of...

Merge branch 'timers/core-v9' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz

Pull nohz enhancements from Frederic Weisbecker:

"Currently in nohz full configs, the tick dependency is checked
 asynchronously by nohz code from interrupt and context switch for each
 concerned subsystem with a set of function provided by these. Such
 functions are made of many conditions and details that can be heavyweight
 as they are called on fastpath: sched_can_stop_tick(),
 posix_cpu_timer_can_stop_tick(), perf_event_can_stop_tick()...

 Thomas suggested a few months ago to make that tick dependency check
 synchronous. Instead of checking subsystems details from each interrupt
 to guess if the tick can be stopped, every subsystem that may have a tick
 dependency should set itself a flag specifying the state of that
 dependency. This way we can verify if we can stop the tick with a single
 lightweight mask check on fast path.

 This conversion from a pull to a push model to implement tick dependency
 is the core feature of this patchset that is split into:

  * Nohz wide kick simplification
  * Improve nohz tracing
  * Introduce tick dependency mask
  * Migrate scheduler, posix timers, perf events and sched clock tick
    dependencies to the tick dependency mask."
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents e2857b8f 4f49b90a
...@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) ...@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
} }
#endif #endif
/**
* fetch_or - perform *ptr |= mask and return old value of *ptr
* @ptr: pointer to value
* @mask: mask to OR on the value
*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#ifndef fetch_or
#define fetch_or(ptr, mask) \
({ typeof(*(ptr)) __old, __val = *(ptr); \
for (;;) { \
__old = cmpxchg((ptr), __val, __val | (mask)); \
if (__old == __val) \
break; \
__val = __old; \
} \
__old; \
})
#endif
#ifdef CONFIG_GENERIC_ATOMIC64 #ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h> #include <asm-generic/atomic64.h>
#endif #endif
......
...@@ -1109,12 +1109,6 @@ static inline void perf_event_task_tick(void) { } ...@@ -1109,12 +1109,6 @@ static inline void perf_event_task_tick(void) { }
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
#endif #endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
extern bool perf_event_can_stop_tick(void);
#else
static inline bool perf_event_can_stop_tick(void) { return true; }
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern void perf_restore_debug_store(void); extern void perf_restore_debug_store(void);
#else #else
......
...@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer); ...@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task); void run_posix_cpu_timers(struct task_struct *task);
void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task);
void posix_cpu_timers_exit_group(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task);
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval); cputime_t *newval, cputime_t *oldval);
......
...@@ -719,6 +719,10 @@ struct signal_struct { ...@@ -719,6 +719,10 @@ struct signal_struct {
/* Earliest-expiration cache. */ /* Earliest-expiration cache. */
struct task_cputime cputime_expires; struct task_cputime cputime_expires;
#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
struct list_head cpu_timers[3]; struct list_head cpu_timers[3];
struct pid *tty_old_pgrp; struct pid *tty_old_pgrp;
...@@ -1542,6 +1546,10 @@ struct task_struct { ...@@ -1542,6 +1546,10 @@ struct task_struct {
VTIME_SYS, VTIME_SYS,
} vtime_snap_whence; } vtime_snap_whence;
#endif #endif
#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */ unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */ u64 start_time; /* monotonic time in nsec */
u64 real_start_time; /* boot based time in nsec */ u64 real_start_time; /* boot based time in nsec */
...@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { } ...@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
#endif #endif
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(void);
extern u64 scheduler_tick_max_deferment(void); extern u64 scheduler_tick_max_deferment(void);
#else
static inline bool sched_can_stop_tick(void) { return false; }
#endif #endif
#ifdef CONFIG_SCHED_AUTOGROUP #ifdef CONFIG_SCHED_AUTOGROUP
......
...@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void) ...@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
} }
enum tick_dep_bits {
TICK_DEP_BIT_POSIX_TIMER = 0,
TICK_DEP_BIT_PERF_EVENTS = 1,
TICK_DEP_BIT_SCHED = 2,
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
};
#define TICK_DEP_MASK_NONE 0
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
extern int tick_nohz_enabled; extern int tick_nohz_enabled;
extern int tick_nohz_tick_stopped(void); extern int tick_nohz_tick_stopped(void);
...@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void) ...@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
return cpumask_any_and(housekeeping_mask, cpu_online_mask); return cpumask_any_and(housekeeping_mask, cpu_online_mask);
} }
extern void tick_nohz_full_kick(void); extern void tick_nohz_dep_set(enum tick_dep_bits bit);
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
/*
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
* on top of static keys.
*/
static inline void tick_dep_set(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set(bit);
}
static inline void tick_dep_clear(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear(bit);
}
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_set_cpu(cpu, bit);
}
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_clear_cpu(cpu, bit);
}
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_signal(signal, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_signal(signal, bit);
}
extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(void); extern void __tick_nohz_task_switch(void);
#else #else
static inline int housekeeping_any_cpu(void) static inline int housekeeping_any_cpu(void)
...@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void) ...@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
static inline void tick_dep_set(enum tick_dep_bits bit) { }
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(void) { } static inline void __tick_nohz_task_switch(void) { }
#endif #endif
......
...@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire, ...@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
); );
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
#define TICK_DEP_NAMES \
tick_dep_name(NONE) \
tick_dep_name(POSIX_TIMER) \
tick_dep_name(PERF_EVENTS) \
tick_dep_name(SCHED) \
tick_dep_name_end(CLOCK_UNSTABLE)
#undef tick_dep_name
#undef tick_dep_name_end
#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
TICK_DEP_NAMES
#undef tick_dep_name
#undef tick_dep_name_end
#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
#define show_tick_dep_name(val) \
__print_symbolic(val, TICK_DEP_NAMES)
TRACE_EVENT(tick_stop, TRACE_EVENT(tick_stop,
TP_PROTO(int success, char *error_msg), TP_PROTO(int success, int dependency),
TP_ARGS(success, error_msg), TP_ARGS(success, dependency),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( int , success ) __field( int , success )
__string( msg, error_msg ) __field( int , dependency )
), ),
TP_fast_assign( TP_fast_assign(
__entry->success = success; __entry->success = success;
__assign_str(msg, error_msg); __entry->dependency = dependency;
), ),
TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) TP_printk("success=%d dependency=%s", __entry->success, \
show_tick_dep_name(__entry->dependency))
); );
#endif #endif
......
...@@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx) ...@@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
return rotate; return rotate;
} }
#ifdef CONFIG_NO_HZ_FULL
bool perf_event_can_stop_tick(void)
{
if (atomic_read(&nr_freq_events) ||
__this_cpu_read(perf_throttled_count))
return false;
else
return true;
}
#endif
void perf_event_task_tick(void) void perf_event_task_tick(void)
{ {
struct list_head *head = this_cpu_ptr(&active_ctx_list); struct list_head *head = this_cpu_ptr(&active_ctx_list);
...@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void) ...@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
__this_cpu_inc(perf_throttled_seq); __this_cpu_inc(perf_throttled_seq);
throttled = __this_cpu_xchg(perf_throttled_count, 0); throttled = __this_cpu_xchg(perf_throttled_count, 0);
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
perf_adjust_freq_unthr_context(ctx, throttled); perf_adjust_freq_unthr_context(ctx, throttled);
...@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) ...@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
atomic_dec(&per_cpu(perf_cgroup_events, cpu)); atomic_dec(&per_cpu(perf_cgroup_events, cpu));
} }
#ifdef CONFIG_NO_HZ_FULL
static DEFINE_SPINLOCK(nr_freq_lock);
#endif
static void unaccount_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
spin_lock(&nr_freq_lock);
if (atomic_dec_and_test(&nr_freq_events))
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}
static void unaccount_freq_event(void)
{
if (tick_nohz_full_enabled())
unaccount_freq_event_nohz();
else
atomic_dec(&nr_freq_events);
}
static void unaccount_event(struct perf_event *event) static void unaccount_event(struct perf_event *event)
{ {
bool dec = false; bool dec = false;
...@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event) ...@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
if (event->attr.task) if (event->attr.task)
atomic_dec(&nr_task_events); atomic_dec(&nr_task_events);
if (event->attr.freq) if (event->attr.freq)
atomic_dec(&nr_freq_events); unaccount_freq_event();
if (event->attr.context_switch) { if (event->attr.context_switch) {
dec = true; dec = true;
atomic_dec(&nr_switch_events); atomic_dec(&nr_switch_events);
...@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event, ...@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
if (unlikely(throttle if (unlikely(throttle
&& hwc->interrupts >= max_samples_per_tick)) { && hwc->interrupts >= max_samples_per_tick)) {
__this_cpu_inc(perf_throttled_count); __this_cpu_inc(perf_throttled_count);
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
hwc->interrupts = MAX_INTERRUPTS; hwc->interrupts = MAX_INTERRUPTS;
perf_log_throttle(event, 0); perf_log_throttle(event, 0);
tick_nohz_full_kick();
ret = 1; ret = 1;
} }
} }
...@@ -7816,6 +7828,27 @@ static void account_event_cpu(struct perf_event *event, int cpu) ...@@ -7816,6 +7828,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
atomic_inc(&per_cpu(perf_cgroup_events, cpu)); atomic_inc(&per_cpu(perf_cgroup_events, cpu));
} }
/* Freq events need the tick to stay alive (see perf_event_task_tick). */
static void account_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
/* Lock so we don't race with concurrent unaccount */
spin_lock(&nr_freq_lock);
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}
static void account_freq_event(void)
{
if (tick_nohz_full_enabled())
account_freq_event_nohz();
else
atomic_inc(&nr_freq_events);
}
static void account_event(struct perf_event *event) static void account_event(struct perf_event *event)
{ {
bool inc = false; bool inc = false;
...@@ -7831,10 +7864,8 @@ static void account_event(struct perf_event *event) ...@@ -7831,10 +7864,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_comm_events); atomic_inc(&nr_comm_events);
if (event->attr.task) if (event->attr.task)
atomic_inc(&nr_task_events); atomic_inc(&nr_task_events);
if (event->attr.freq) { if (event->attr.freq)
if (atomic_inc_return(&nr_freq_events) == 1) account_freq_event();
tick_nohz_full_kick_all();
}
if (event->attr.context_switch) { if (event->attr.context_switch) {
atomic_inc(&nr_switch_events); atomic_inc(&nr_switch_events);
inc = true; inc = true;
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#include <linux/static_key.h> #include <linux/static_key.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/tick.h>
/* /*
* Scheduler clock - returns current time in nanosec units. * Scheduler clock - returns current time in nanosec units.
...@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void) ...@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void)
{ {
if (!sched_clock_stable()) if (!sched_clock_stable())
static_key_slow_inc(&__sched_clock_stable); static_key_slow_inc(&__sched_clock_stable);
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
} }
void set_sched_clock_stable(void) void set_sched_clock_stable(void)
...@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work) ...@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work)
/* XXX worry about clock continuity */ /* XXX worry about clock continuity */
if (sched_clock_stable()) if (sched_clock_stable())
static_key_slow_dec(&__sched_clock_stable); static_key_slow_dec(&__sched_clock_stable);
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
} }
static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
......
...@@ -453,20 +453,6 @@ static inline void init_hrtick(void) ...@@ -453,20 +453,6 @@ static inline void init_hrtick(void)
} }
#endif /* CONFIG_SCHED_HRTICK */ #endif /* CONFIG_SCHED_HRTICK */
/*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#define fetch_or(ptr, val) \
({ typeof(*(ptr)) __old, __val = *(ptr); \
for (;;) { \
__old = cmpxchg((ptr), __val, __val | (val)); \
if (__old == __val) \
break; \
__val = __old; \
} \
__old; \
})
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
/* /*
* Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
...@@ -715,31 +701,36 @@ static inline bool got_nohz_idle_kick(void) ...@@ -715,31 +701,36 @@ static inline bool got_nohz_idle_kick(void)
#endif /* CONFIG_NO_HZ_COMMON */ #endif /* CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
bool sched_can_stop_tick(void) bool sched_can_stop_tick(struct rq *rq)
{ {
int fifo_nr_running;
/* Deadline tasks, even if single, need the tick */
if (rq->dl.dl_nr_running)
return false;
/* /*
* FIFO realtime policy runs the highest priority task. Other runnable * FIFO realtime policy runs the highest priority task (after DEADLINE).
* tasks are of a lower priority. The scheduler tick does nothing. * Other runnable tasks are of a lower priority. The scheduler tick
* isn't needed.
*/ */
if (current->policy == SCHED_FIFO) fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
if (fifo_nr_running)
return true; return true;
/* /*
* Round-robin realtime tasks time slice with other tasks at the same * Round-robin realtime tasks time slice with other tasks at the same
* realtime priority. Is this task the only one at this priority? * realtime priority.
*/ */
if (current->policy == SCHED_RR) { if (rq->rt.rr_nr_running) {
struct sched_rt_entity *rt_se = &current->rt; if (rq->rt.rr_nr_running == 1)
return true;
return list_is_singular(&rt_se->run_list); else
return false;
} }
/* /* Normal multitasking need periodic preemption checks */
* More than one running task need preemption. if (rq->cfs.nr_running > 1)
* nr_running update is assumed to be visible
* after IPI is sent from wakers.
*/
if (this_rq()->nr_running > 1)
return false; return false;
return true; return true;
......
...@@ -1141,6 +1141,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) ...@@ -1141,6 +1141,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
return 1; return 1;
} }
static inline
unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
{
struct rt_rq *group_rq = group_rt_rq(rt_se);
struct task_struct *tsk;
if (group_rq)
return group_rq->rr_nr_running;
tsk = rt_task_of(rt_se);
return (tsk->policy == SCHED_RR) ? 1 : 0;
}
static inline static inline
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{ {
...@@ -1148,6 +1162,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) ...@@ -1148,6 +1162,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
WARN_ON(!rt_prio(prio)); WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running += rt_se_nr_running(rt_se); rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
inc_rt_prio(rt_rq, prio); inc_rt_prio(rt_rq, prio);
inc_rt_migration(rt_se, rt_rq); inc_rt_migration(rt_se, rt_rq);
...@@ -1160,6 +1175,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) ...@@ -1160,6 +1175,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
WARN_ON(!rt_prio(rt_se_prio(rt_se))); WARN_ON(!rt_prio(rt_se_prio(rt_se)));
WARN_ON(!rt_rq->rt_nr_running); WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
dec_rt_prio(rt_rq, rt_se_prio(rt_se)); dec_rt_prio(rt_rq, rt_se_prio(rt_se));
dec_rt_migration(rt_se, rt_rq); dec_rt_migration(rt_se, rt_rq);
......
...@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void) ...@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void)
struct rt_rq { struct rt_rq {
struct rt_prio_array active; struct rt_prio_array active;
unsigned int rt_nr_running; unsigned int rt_nr_running;
unsigned int rr_nr_running;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
struct { struct {
int curr; /* highest queued rt task prio */ int curr; /* highest queued rt task prio */
...@@ -1278,6 +1279,35 @@ unsigned long to_ratio(u64 period, u64 runtime); ...@@ -1278,6 +1279,35 @@ unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se); extern void init_entity_runnable_average(struct sched_entity *se);
#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(struct rq *rq);
/*
* Tick may be needed by tasks in the runqueue depending on their policy and
* requirements. If tick is needed, lets send the target an IPI to kick it out of
* nohz mode if necessary.
*/
static inline void sched_update_tick_dependency(struct rq *rq)
{
int cpu;
if (!tick_nohz_full_enabled())
return;
cpu = cpu_of(rq);
if (!tick_nohz_full_cpu(cpu))
return;
if (sched_can_stop_tick(rq))
tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
else
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
}
#else
static inline void sched_update_tick_dependency(struct rq *rq) { }
#endif
static inline void add_nr_running(struct rq *rq, unsigned count) static inline void add_nr_running(struct rq *rq, unsigned count)
{ {
unsigned prev_nr = rq->nr_running; unsigned prev_nr = rq->nr_running;
...@@ -1289,26 +1319,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count) ...@@ -1289,26 +1319,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
if (!rq->rd->overload) if (!rq->rd->overload)
rq->rd->overload = true; rq->rd->overload = true;
#endif #endif
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_cpu(rq->cpu)) {
/*
* Tick is needed if more than one task runs on a CPU.
* Send the target an IPI to kick it out of nohz mode.
*
* We assume that IPI implies full memory barrier and the
* new value of rq->nr_running is visible on reception
* from the target.
*/
tick_nohz_full_kick_cpu(rq->cpu);
}
#endif
} }
sched_update_tick_dependency(rq);
} }
static inline void sub_nr_running(struct rq *rq, unsigned count) static inline void sub_nr_running(struct rq *rq, unsigned count)
{ {
rq->nr_running -= count; rq->nr_running -= count;
/* Check if we still need preemption */
sched_update_tick_dependency(rq);
} }
static inline void rq_last_tick_reset(struct rq *rq) static inline void rq_last_tick_reset(struct rq *rq)
......
...@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) ...@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
return err; return err;
} }
/* /*
* Validate the clockid_t for a new CPU-clock timer, and initialize the timer. * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
* This is called from sys_timer_create() and do_cpu_nanosleep() with the * This is called from sys_timer_create() and do_cpu_nanosleep() with the
...@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer) ...@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
cputime_expires->sched_exp = exp; cputime_expires->sched_exp = exp;
break; break;
} }
if (CPUCLOCK_PERTHREAD(timer->it_clock))
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
else
tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
} }
} }
...@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock, ...@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
return 0; return 0;
} }
#ifdef CONFIG_NO_HZ_FULL
static void nohz_kick_work_fn(struct work_struct *work)
{
tick_nohz_full_kick_all();
}
static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
/*
* We need the IPIs to be sent from sane process context.
* The posix cpu timers are always set with irqs disabled.
*/
static void posix_cpu_timer_kick_nohz(void)
{
if (context_tracking_is_enabled())
schedule_work(&nohz_kick_work);
}
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
{
if (!task_cputime_zero(&tsk->cputime_expires))
return false;
/* Check if cputimer is running. This is accessed without locking. */
if (READ_ONCE(tsk->signal->cputimer.running))
return false;
return true;
}
#else
static inline void posix_cpu_timer_kick_nohz(void) { }
#endif
/* /*
* Guts of sys_timer_settime for CPU timers. * Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled. * This is called with the timer locked and interrupts disabled.
...@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ...@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
sample_to_timespec(timer->it_clock, sample_to_timespec(timer->it_clock,
old_incr, &old->it_interval); old_incr, &old->it_interval);
} }
if (!ret)
posix_cpu_timer_kick_nohz();
return ret; return ret;
} }
...@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk, ...@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
} }
} }
if (task_cputime_zero(tsk_expires))
tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
} }
static inline void stop_process_timers(struct signal_struct *sig) static inline void stop_process_timers(struct signal_struct *sig)
...@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig) ...@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
/* Turn off cputimer->running. This is done without locking. */ /* Turn off cputimer->running. This is done without locking. */
WRITE_ONCE(cputimer->running, false); WRITE_ONCE(cputimer->running, false);
tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
} }
static u32 onecputick; static u32 onecputick;
...@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) ...@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
arm_timer(timer); arm_timer(timer);
unlock_task_sighand(p, &flags); unlock_task_sighand(p, &flags);
/* Kick full dynticks CPUs in case they need to tick on the new timer */
posix_cpu_timer_kick_nohz();
out: out:
timer->it_overrun_last = timer->it_overrun; timer->it_overrun_last = timer->it_overrun;
timer->it_overrun = -1; timer->it_overrun = -1;
...@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, ...@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
} }
if (!*newval) if (!*newval)
goto out; return;
*newval += now; *newval += now;
} }
...@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, ...@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
tsk->signal->cputime_expires.virt_exp = *newval; tsk->signal->cputime_expires.virt_exp = *newval;
break; break;
} }
out:
posix_cpu_timer_kick_nohz(); tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
} }
static int do_cpu_nanosleep(const clockid_t which_clock, int flags, static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/irq_work.h> #include <linux/irq_work.h>
#include <linux/posix-timers.h> #include <linux/posix-timers.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h> #include <linux/context_tracking.h>
#include <asm/irq_regs.h> #include <asm/irq_regs.h>
...@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) ...@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
cpumask_var_t tick_nohz_full_mask; cpumask_var_t tick_nohz_full_mask;
cpumask_var_t housekeeping_mask; cpumask_var_t housekeeping_mask;
bool tick_nohz_full_running; bool tick_nohz_full_running;
static unsigned long tick_dep_mask;
static bool can_stop_full_tick(void) static void trace_tick_dependency(unsigned long dep)
{
if (dep & TICK_DEP_MASK_POSIX_TIMER) {
trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
return;
}
if (dep & TICK_DEP_MASK_PERF_EVENTS) {
trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
return;
}
if (dep & TICK_DEP_MASK_SCHED) {
trace_tick_stop(0, TICK_DEP_MASK_SCHED);
return;
}
if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
}
static bool can_stop_full_tick(struct tick_sched *ts)
{ {
WARN_ON_ONCE(!irqs_disabled()); WARN_ON_ONCE(!irqs_disabled());
if (!sched_can_stop_tick()) { if (tick_dep_mask) {
trace_tick_stop(0, "more than 1 task in runqueue\n"); trace_tick_dependency(tick_dep_mask);
return false; return false;
} }
if (!posix_cpu_timers_can_stop_tick(current)) { if (ts->tick_dep_mask) {
trace_tick_stop(0, "posix timers running\n"); trace_tick_dependency(ts->tick_dep_mask);
return false; return false;
} }
if (!perf_event_can_stop_tick()) { if (current->tick_dep_mask) {
trace_tick_stop(0, "perf events running\n"); trace_tick_dependency(current->tick_dep_mask);
return false; return false;
} }
/* sched_clock_tick() needs us? */ if (current->signal->tick_dep_mask) {
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK trace_tick_dependency(current->signal->tick_dep_mask);
/*
* TODO: kick full dynticks CPUs when
* sched_clock_stable is set.
*/
if (!sched_clock_stable()) {
trace_tick_stop(0, "unstable sched clock\n");
/*
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
WARN_ONCE(tick_nohz_full_running,
"NO_HZ FULL will not work with unstable sched clock");
return false; return false;
} }
#endif
return true; return true;
} }
static void nohz_full_kick_work_func(struct irq_work *work) static void nohz_full_kick_func(struct irq_work *work)
{ {
/* Empty, the tick restart happens on tick_nohz_irq_exit() */ /* Empty, the tick restart happens on tick_nohz_irq_exit() */
} }
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_work_func, .func = nohz_full_kick_func,
}; };
/* /*
...@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { ...@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
* is NMI safe. * is NMI safe.
*/ */
void tick_nohz_full_kick(void) static void tick_nohz_full_kick(void)
{ {
if (!tick_nohz_full_cpu(smp_processor_id())) if (!tick_nohz_full_cpu(smp_processor_id()))
return; return;
...@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu) ...@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu)
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
} }
static void nohz_full_kick_ipi(void *info)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
/* /*
* Kick all full dynticks CPUs in order to force these to re-evaluate * Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary. * their dependency on the tick and restart it if necessary.
*/ */
void tick_nohz_full_kick_all(void) static void tick_nohz_full_kick_all(void)
{ {
int cpu;
if (!tick_nohz_full_running) if (!tick_nohz_full_running)
return; return;
preempt_disable(); preempt_disable();
smp_call_function_many(tick_nohz_full_mask, for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
nohz_full_kick_ipi, NULL, false); tick_nohz_full_kick_cpu(cpu);
tick_nohz_full_kick();
preempt_enable(); preempt_enable();
} }
static void tick_nohz_dep_set_all(unsigned long *dep,
enum tick_dep_bits bit)
{
unsigned long prev;
prev = fetch_or(dep, BIT_MASK(bit));
if (!prev)
tick_nohz_full_kick_all();
}
/*
* Set a global tick dependency. Used by perf events that rely on freq and
* by unstable clock.
*/
void tick_nohz_dep_set(enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&tick_dep_mask, bit);
}
void tick_nohz_dep_clear(enum tick_dep_bits bit)
{
clear_bit(bit, &tick_dep_mask);
}
/*
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
* manage events throttling.
*/
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
unsigned long prev;
struct tick_sched *ts;
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
if (!prev) {
preempt_disable();
/* Perf needs local kick that is NMI safe */
if (cpu == smp_processor_id()) {
tick_nohz_full_kick();
} else {
/* Remote irq work not NMI-safe */
if (!WARN_ON_ONCE(in_nmi()))
tick_nohz_full_kick_cpu(cpu);
}
preempt_enable();
}
}
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
clear_bit(bit, &ts->tick_dep_mask);
}
/*
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
* per task timers.
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
/*
* We could optimize this with just kicking the target running the task
* if that noise matters for nohz full users.
*/
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
}
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
clear_bit(bit, &tsk->tick_dep_mask);
}
/*
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
}
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
clear_bit(bit, &sig->tick_dep_mask);
}
/* /*
* Re-evaluate the need for the tick as we switch the current task. * Re-evaluate the need for the tick as we switch the current task.
* It might need the tick due to per task/process properties: * It might need the tick due to per task/process properties:
...@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void) ...@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void)
void __tick_nohz_task_switch(void) void __tick_nohz_task_switch(void)
{ {
unsigned long flags; unsigned long flags;
struct tick_sched *ts;
local_irq_save(flags); local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id())) if (!tick_nohz_full_cpu(smp_processor_id()))
goto out; goto out;
if (tick_nohz_tick_stopped() && !can_stop_full_tick()) ts = this_cpu_ptr(&tick_cpu_sched);
tick_nohz_full_kick();
if (ts->tick_stopped) {
if (current->tick_dep_mask || current->signal->tick_dep_mask)
tick_nohz_full_kick();
}
out: out:
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ...@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1; ts->tick_stopped = 1;
trace_tick_stop(1, " "); trace_tick_stop(1, TICK_DEP_MASK_NONE);
} }
/* /*
...@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) ...@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return; return;
if (can_stop_full_tick()) if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped) else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get(), 1); tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
......
...@@ -60,6 +60,7 @@ struct tick_sched { ...@@ -60,6 +60,7 @@ struct tick_sched {
u64 next_timer; u64 next_timer;
ktime_t idle_expires; ktime_t idle_expires;
int do_timer_last; int do_timer_last;
unsigned long tick_dep_mask;
}; };
extern struct tick_sched *tick_get_tick_sched(int cpu); extern struct tick_sched *tick_get_tick_sched(int cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment