Commit e23604ed authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull NOHZ updates from Ingo Molnar:
 "NOHZ enhancements, by Frederic Weisbecker, which reorganizes/refactors
  the NOHZ 'can the tick be stopped?' infrastructure and related code to
  be data driven, and harmonizes the naming and handling of all the
  various properties"

[ This makes the ugly "fetch_or()" macro that the scheduler used
  internally a new generic helper, and does a bad job at it.

  I'm pulling it, but I've asked Ingo and Frederic to get this
  fixed up ]

* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched-clock: Migrate to use new tick dependency mask model
  posix-cpu-timers: Migrate to use new tick dependency mask model
  sched: Migrate sched to use new tick dependency mask model
  sched: Account rr tasks
  perf: Migrate perf to use new tick dependency mask model
  nohz: Use enum code for tick stop failure tracing message
  nohz: New tick dependency mask
  nohz: Implement wide kick on top of irq work
  atomic: Export fetch_or()
parents d4e79615 1f251846
...@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) ...@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
} }
#endif #endif
/**
* fetch_or - perform *ptr |= mask and return old value of *ptr
* @ptr: pointer to value
* @mask: mask to OR on the value
*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#ifndef fetch_or
#define fetch_or(ptr, mask) \
({ typeof(*(ptr)) __old, __val = *(ptr); \
for (;;) { \
__old = cmpxchg((ptr), __val, __val | (mask)); \
if (__old == __val) \
break; \
__val = __old; \
} \
__old; \
})
#endif
#ifdef CONFIG_GENERIC_ATOMIC64 #ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h> #include <asm-generic/atomic64.h>
#endif #endif
......
...@@ -1110,12 +1110,6 @@ static inline void perf_event_task_tick(void) { } ...@@ -1110,12 +1110,6 @@ static inline void perf_event_task_tick(void) { }
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
#endif #endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
extern bool perf_event_can_stop_tick(void);
#else
static inline bool perf_event_can_stop_tick(void) { return true; }
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern void perf_restore_debug_store(void); extern void perf_restore_debug_store(void);
#else #else
......
...@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer); ...@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task); void run_posix_cpu_timers(struct task_struct *task);
void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit(struct task_struct *task);
void posix_cpu_timers_exit_group(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task);
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval); cputime_t *newval, cputime_t *oldval);
......
...@@ -717,6 +717,10 @@ struct signal_struct { ...@@ -717,6 +717,10 @@ struct signal_struct {
/* Earliest-expiration cache. */ /* Earliest-expiration cache. */
struct task_cputime cputime_expires; struct task_cputime cputime_expires;
#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
struct list_head cpu_timers[3]; struct list_head cpu_timers[3];
struct pid *tty_old_pgrp; struct pid *tty_old_pgrp;
...@@ -1542,6 +1546,10 @@ struct task_struct { ...@@ -1542,6 +1546,10 @@ struct task_struct {
VTIME_SYS, VTIME_SYS,
} vtime_snap_whence; } vtime_snap_whence;
#endif #endif
#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */ unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */ u64 start_time; /* monotonic time in nsec */
u64 real_start_time; /* boot based time in nsec */ u64 real_start_time; /* boot based time in nsec */
...@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { } ...@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
#endif #endif
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(void);
extern u64 scheduler_tick_max_deferment(void); extern u64 scheduler_tick_max_deferment(void);
#else
static inline bool sched_can_stop_tick(void) { return false; }
#endif #endif
#ifdef CONFIG_SCHED_AUTOGROUP #ifdef CONFIG_SCHED_AUTOGROUP
......
...@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void) ...@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT); tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
} }
enum tick_dep_bits {
TICK_DEP_BIT_POSIX_TIMER = 0,
TICK_DEP_BIT_PERF_EVENTS = 1,
TICK_DEP_BIT_SCHED = 2,
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
};
#define TICK_DEP_MASK_NONE 0
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
extern int tick_nohz_enabled; extern int tick_nohz_enabled;
extern int tick_nohz_tick_stopped(void); extern int tick_nohz_tick_stopped(void);
...@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void) ...@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
return cpumask_any_and(housekeeping_mask, cpu_online_mask); return cpumask_any_and(housekeeping_mask, cpu_online_mask);
} }
extern void tick_nohz_full_kick(void); extern void tick_nohz_dep_set(enum tick_dep_bits bit);
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
/*
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
* on top of static keys.
*/
static inline void tick_dep_set(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set(bit);
}
static inline void tick_dep_clear(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear(bit);
}
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_set_cpu(cpu, bit);
}
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_clear_cpu(cpu, bit);
}
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_signal(signal, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_signal(signal, bit);
}
extern void tick_nohz_full_kick_cpu(int cpu); extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(void); extern void __tick_nohz_task_switch(void);
#else #else
static inline int housekeeping_any_cpu(void) static inline int housekeeping_any_cpu(void)
...@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void) ...@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
static inline void tick_dep_set(enum tick_dep_bits bit) { }
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(void) { } static inline void __tick_nohz_task_switch(void) { }
#endif #endif
......
...@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire, ...@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
); );
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
#define TICK_DEP_NAMES \
tick_dep_name(NONE) \
tick_dep_name(POSIX_TIMER) \
tick_dep_name(PERF_EVENTS) \
tick_dep_name(SCHED) \
tick_dep_name_end(CLOCK_UNSTABLE)
#undef tick_dep_name
#undef tick_dep_name_end
#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
TICK_DEP_NAMES
#undef tick_dep_name
#undef tick_dep_name_end
#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
#define show_tick_dep_name(val) \
__print_symbolic(val, TICK_DEP_NAMES)
TRACE_EVENT(tick_stop, TRACE_EVENT(tick_stop,
TP_PROTO(int success, char *error_msg), TP_PROTO(int success, int dependency),
TP_ARGS(success, error_msg), TP_ARGS(success, dependency),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( int , success ) __field( int , success )
__string( msg, error_msg ) __field( int , dependency )
), ),
TP_fast_assign( TP_fast_assign(
__entry->success = success; __entry->success = success;
__assign_str(msg, error_msg); __entry->dependency = dependency;
), ),
TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg)) TP_printk("success=%d dependency=%s", __entry->success, \
show_tick_dep_name(__entry->dependency))
); );
#endif #endif
......
...@@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx) ...@@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
return rotate; return rotate;
} }
#ifdef CONFIG_NO_HZ_FULL
bool perf_event_can_stop_tick(void)
{
if (atomic_read(&nr_freq_events) ||
__this_cpu_read(perf_throttled_count))
return false;
else
return true;
}
#endif
void perf_event_task_tick(void) void perf_event_task_tick(void)
{ {
struct list_head *head = this_cpu_ptr(&active_ctx_list); struct list_head *head = this_cpu_ptr(&active_ctx_list);
...@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void) ...@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
__this_cpu_inc(perf_throttled_seq); __this_cpu_inc(perf_throttled_seq);
throttled = __this_cpu_xchg(perf_throttled_count, 0); throttled = __this_cpu_xchg(perf_throttled_count, 0);
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
perf_adjust_freq_unthr_context(ctx, throttled); perf_adjust_freq_unthr_context(ctx, throttled);
...@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu) ...@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
atomic_dec(&per_cpu(perf_cgroup_events, cpu)); atomic_dec(&per_cpu(perf_cgroup_events, cpu));
} }
#ifdef CONFIG_NO_HZ_FULL
static DEFINE_SPINLOCK(nr_freq_lock);
#endif
static void unaccount_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
spin_lock(&nr_freq_lock);
if (atomic_dec_and_test(&nr_freq_events))
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}
static void unaccount_freq_event(void)
{
if (tick_nohz_full_enabled())
unaccount_freq_event_nohz();
else
atomic_dec(&nr_freq_events);
}
static void unaccount_event(struct perf_event *event) static void unaccount_event(struct perf_event *event)
{ {
bool dec = false; bool dec = false;
...@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event) ...@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
if (event->attr.task) if (event->attr.task)
atomic_dec(&nr_task_events); atomic_dec(&nr_task_events);
if (event->attr.freq) if (event->attr.freq)
atomic_dec(&nr_freq_events); unaccount_freq_event();
if (event->attr.context_switch) { if (event->attr.context_switch) {
dec = true; dec = true;
atomic_dec(&nr_switch_events); atomic_dec(&nr_switch_events);
...@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event, ...@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
if (unlikely(throttle if (unlikely(throttle
&& hwc->interrupts >= max_samples_per_tick)) { && hwc->interrupts >= max_samples_per_tick)) {
__this_cpu_inc(perf_throttled_count); __this_cpu_inc(perf_throttled_count);
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
hwc->interrupts = MAX_INTERRUPTS; hwc->interrupts = MAX_INTERRUPTS;
perf_log_throttle(event, 0); perf_log_throttle(event, 0);
tick_nohz_full_kick();
ret = 1; ret = 1;
} }
} }
...@@ -7815,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu) ...@@ -7815,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
atomic_inc(&per_cpu(perf_cgroup_events, cpu)); atomic_inc(&per_cpu(perf_cgroup_events, cpu));
} }
/* Freq events need the tick to stay alive (see perf_event_task_tick). */
static void account_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
/* Lock so we don't race with concurrent unaccount */
spin_lock(&nr_freq_lock);
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}
static void account_freq_event(void)
{
if (tick_nohz_full_enabled())
account_freq_event_nohz();
else
atomic_inc(&nr_freq_events);
}
static void account_event(struct perf_event *event) static void account_event(struct perf_event *event)
{ {
bool inc = false; bool inc = false;
...@@ -7830,10 +7863,8 @@ static void account_event(struct perf_event *event) ...@@ -7830,10 +7863,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_comm_events); atomic_inc(&nr_comm_events);
if (event->attr.task) if (event->attr.task)
atomic_inc(&nr_task_events); atomic_inc(&nr_task_events);
if (event->attr.freq) { if (event->attr.freq)
if (atomic_inc_return(&nr_freq_events) == 1) account_freq_event();
tick_nohz_full_kick_all();
}
if (event->attr.context_switch) { if (event->attr.context_switch) {
atomic_inc(&nr_switch_events); atomic_inc(&nr_switch_events);
inc = true; inc = true;
......
...@@ -61,6 +61,7 @@ ...@@ -61,6 +61,7 @@
#include <linux/static_key.h> #include <linux/static_key.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/tick.h>
/* /*
* Scheduler clock - returns current time in nanosec units. * Scheduler clock - returns current time in nanosec units.
...@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void) ...@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void)
{ {
if (!sched_clock_stable()) if (!sched_clock_stable())
static_key_slow_inc(&__sched_clock_stable); static_key_slow_inc(&__sched_clock_stable);
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
} }
void set_sched_clock_stable(void) void set_sched_clock_stable(void)
...@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work) ...@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work)
/* XXX worry about clock continuity */ /* XXX worry about clock continuity */
if (sched_clock_stable()) if (sched_clock_stable())
static_key_slow_dec(&__sched_clock_stable); static_key_slow_dec(&__sched_clock_stable);
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
} }
static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
......
...@@ -320,20 +320,6 @@ static inline void init_hrtick(void) ...@@ -320,20 +320,6 @@ static inline void init_hrtick(void)
} }
#endif /* CONFIG_SCHED_HRTICK */ #endif /* CONFIG_SCHED_HRTICK */
/*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#define fetch_or(ptr, val) \
({ typeof(*(ptr)) __old, __val = *(ptr); \
for (;;) { \
__old = cmpxchg((ptr), __val, __val | (val)); \
if (__old == __val) \
break; \
__val = __old; \
} \
__old; \
})
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG) #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
/* /*
* Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG, * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
...@@ -582,31 +568,36 @@ static inline bool got_nohz_idle_kick(void) ...@@ -582,31 +568,36 @@ static inline bool got_nohz_idle_kick(void)
#endif /* CONFIG_NO_HZ_COMMON */ #endif /* CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
bool sched_can_stop_tick(void) bool sched_can_stop_tick(struct rq *rq)
{ {
int fifo_nr_running;
/* Deadline tasks, even if single, need the tick */
if (rq->dl.dl_nr_running)
return false;
/* /*
* FIFO realtime policy runs the highest priority task. Other runnable * FIFO realtime policy runs the highest priority task (after DEADLINE).
* tasks are of a lower priority. The scheduler tick does nothing. * Other runnable tasks are of a lower priority. The scheduler tick
* isn't needed.
*/ */
if (current->policy == SCHED_FIFO) fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
if (fifo_nr_running)
return true; return true;
/* /*
* Round-robin realtime tasks time slice with other tasks at the same * Round-robin realtime tasks time slice with other tasks at the same
* realtime priority. Is this task the only one at this priority? * realtime priority.
*/ */
if (current->policy == SCHED_RR) { if (rq->rt.rr_nr_running) {
struct sched_rt_entity *rt_se = &current->rt; if (rq->rt.rr_nr_running == 1)
return true;
return list_is_singular(&rt_se->run_list); else
return false;
} }
/* /* Normal multitasking need periodic preemption checks */
* More than one running task need preemption. if (rq->cfs.nr_running > 1)
* nr_running update is assumed to be visible
* after IPI is sent from wakers.
*/
if (this_rq()->nr_running > 1)
return false; return false;
return true; return true;
......
...@@ -1149,6 +1149,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) ...@@ -1149,6 +1149,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
return 1; return 1;
} }
static inline
unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
{
struct rt_rq *group_rq = group_rt_rq(rt_se);
struct task_struct *tsk;
if (group_rq)
return group_rq->rr_nr_running;
tsk = rt_task_of(rt_se);
return (tsk->policy == SCHED_RR) ? 1 : 0;
}
static inline static inline
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{ {
...@@ -1156,6 +1170,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) ...@@ -1156,6 +1170,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
WARN_ON(!rt_prio(prio)); WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running += rt_se_nr_running(rt_se); rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
inc_rt_prio(rt_rq, prio); inc_rt_prio(rt_rq, prio);
inc_rt_migration(rt_se, rt_rq); inc_rt_migration(rt_se, rt_rq);
...@@ -1168,6 +1183,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) ...@@ -1168,6 +1183,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
WARN_ON(!rt_prio(rt_se_prio(rt_se))); WARN_ON(!rt_prio(rt_se_prio(rt_se)));
WARN_ON(!rt_rq->rt_nr_running); WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
dec_rt_prio(rt_rq, rt_se_prio(rt_se)); dec_rt_prio(rt_rq, rt_se_prio(rt_se));
dec_rt_migration(rt_se, rt_rq); dec_rt_migration(rt_se, rt_rq);
......
...@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void) ...@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void)
struct rt_rq { struct rt_rq {
struct rt_prio_array active; struct rt_prio_array active;
unsigned int rt_nr_running; unsigned int rt_nr_running;
unsigned int rr_nr_running;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
struct { struct {
int curr; /* highest queued rt task prio */ int curr; /* highest queued rt task prio */
...@@ -1313,6 +1314,35 @@ unsigned long to_ratio(u64 period, u64 runtime); ...@@ -1313,6 +1314,35 @@ unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se); extern void init_entity_runnable_average(struct sched_entity *se);
#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(struct rq *rq);
/*
* Tick may be needed by tasks in the runqueue depending on their policy and
* requirements. If tick is needed, lets send the target an IPI to kick it out of
* nohz mode if necessary.
*/
static inline void sched_update_tick_dependency(struct rq *rq)
{
int cpu;
if (!tick_nohz_full_enabled())
return;
cpu = cpu_of(rq);
if (!tick_nohz_full_cpu(cpu))
return;
if (sched_can_stop_tick(rq))
tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
else
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
}
#else
static inline void sched_update_tick_dependency(struct rq *rq) { }
#endif
static inline void add_nr_running(struct rq *rq, unsigned count) static inline void add_nr_running(struct rq *rq, unsigned count)
{ {
unsigned prev_nr = rq->nr_running; unsigned prev_nr = rq->nr_running;
...@@ -1324,26 +1354,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count) ...@@ -1324,26 +1354,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
if (!rq->rd->overload) if (!rq->rd->overload)
rq->rd->overload = true; rq->rd->overload = true;
#endif #endif
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_cpu(rq->cpu)) {
/*
* Tick is needed if more than one task runs on a CPU.
* Send the target an IPI to kick it out of nohz mode.
*
* We assume that IPI implies full memory barrier and the
* new value of rq->nr_running is visible on reception
* from the target.
*/
tick_nohz_full_kick_cpu(rq->cpu);
}
#endif
} }
sched_update_tick_dependency(rq);
} }
static inline void sub_nr_running(struct rq *rq, unsigned count) static inline void sub_nr_running(struct rq *rq, unsigned count)
{ {
rq->nr_running -= count; rq->nr_running -= count;
/* Check if we still need preemption */
sched_update_tick_dependency(rq);
} }
static inline void rq_last_tick_reset(struct rq *rq) static inline void rq_last_tick_reset(struct rq *rq)
......
...@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp) ...@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
return err; return err;
} }
/* /*
* Validate the clockid_t for a new CPU-clock timer, and initialize the timer. * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
* This is called from sys_timer_create() and do_cpu_nanosleep() with the * This is called from sys_timer_create() and do_cpu_nanosleep() with the
...@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer) ...@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
cputime_expires->sched_exp = exp; cputime_expires->sched_exp = exp;
break; break;
} }
if (CPUCLOCK_PERTHREAD(timer->it_clock))
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
else
tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
} }
} }
...@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock, ...@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
return 0; return 0;
} }
#ifdef CONFIG_NO_HZ_FULL
static void nohz_kick_work_fn(struct work_struct *work)
{
tick_nohz_full_kick_all();
}
static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
/*
* We need the IPIs to be sent from sane process context.
* The posix cpu timers are always set with irqs disabled.
*/
static void posix_cpu_timer_kick_nohz(void)
{
if (context_tracking_is_enabled())
schedule_work(&nohz_kick_work);
}
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
{
if (!task_cputime_zero(&tsk->cputime_expires))
return false;
/* Check if cputimer is running. This is accessed without locking. */
if (READ_ONCE(tsk->signal->cputimer.running))
return false;
return true;
}
#else
static inline void posix_cpu_timer_kick_nohz(void) { }
#endif
/* /*
* Guts of sys_timer_settime for CPU timers. * Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled. * This is called with the timer locked and interrupts disabled.
...@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, ...@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
sample_to_timespec(timer->it_clock, sample_to_timespec(timer->it_clock,
old_incr, &old->it_interval); old_incr, &old->it_interval);
} }
if (!ret)
posix_cpu_timer_kick_nohz();
return ret; return ret;
} }
...@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk, ...@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
} }
} }
if (task_cputime_zero(tsk_expires))
tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
} }
static inline void stop_process_timers(struct signal_struct *sig) static inline void stop_process_timers(struct signal_struct *sig)
...@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig) ...@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
/* Turn off cputimer->running. This is done without locking. */ /* Turn off cputimer->running. This is done without locking. */
WRITE_ONCE(cputimer->running, false); WRITE_ONCE(cputimer->running, false);
tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
} }
static u32 onecputick; static u32 onecputick;
...@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) ...@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
arm_timer(timer); arm_timer(timer);
unlock_task_sighand(p, &flags); unlock_task_sighand(p, &flags);
/* Kick full dynticks CPUs in case they need to tick on the new timer */
posix_cpu_timer_kick_nohz();
out: out:
timer->it_overrun_last = timer->it_overrun; timer->it_overrun_last = timer->it_overrun;
timer->it_overrun = -1; timer->it_overrun = -1;
...@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, ...@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
} }
if (!*newval) if (!*newval)
goto out; return;
*newval += now; *newval += now;
} }
...@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, ...@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
tsk->signal->cputime_expires.virt_exp = *newval; tsk->signal->cputime_expires.virt_exp = *newval;
break; break;
} }
out:
posix_cpu_timer_kick_nohz(); tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
} }
static int do_cpu_nanosleep(const clockid_t which_clock, int flags, static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/irq_work.h> #include <linux/irq_work.h>
#include <linux/posix-timers.h> #include <linux/posix-timers.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h> #include <linux/context_tracking.h>
#include <asm/irq_regs.h> #include <asm/irq_regs.h>
...@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs) ...@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
cpumask_var_t tick_nohz_full_mask; cpumask_var_t tick_nohz_full_mask;
cpumask_var_t housekeeping_mask; cpumask_var_t housekeeping_mask;
bool tick_nohz_full_running; bool tick_nohz_full_running;
static unsigned long tick_dep_mask;
static bool can_stop_full_tick(void) static void trace_tick_dependency(unsigned long dep)
{
if (dep & TICK_DEP_MASK_POSIX_TIMER) {
trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
return;
}
if (dep & TICK_DEP_MASK_PERF_EVENTS) {
trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
return;
}
if (dep & TICK_DEP_MASK_SCHED) {
trace_tick_stop(0, TICK_DEP_MASK_SCHED);
return;
}
if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
}
static bool can_stop_full_tick(struct tick_sched *ts)
{ {
WARN_ON_ONCE(!irqs_disabled()); WARN_ON_ONCE(!irqs_disabled());
if (!sched_can_stop_tick()) { if (tick_dep_mask) {
trace_tick_stop(0, "more than 1 task in runqueue\n"); trace_tick_dependency(tick_dep_mask);
return false; return false;
} }
if (!posix_cpu_timers_can_stop_tick(current)) { if (ts->tick_dep_mask) {
trace_tick_stop(0, "posix timers running\n"); trace_tick_dependency(ts->tick_dep_mask);
return false; return false;
} }
if (!perf_event_can_stop_tick()) { if (current->tick_dep_mask) {
trace_tick_stop(0, "perf events running\n"); trace_tick_dependency(current->tick_dep_mask);
return false; return false;
} }
/* sched_clock_tick() needs us? */ if (current->signal->tick_dep_mask) {
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK trace_tick_dependency(current->signal->tick_dep_mask);
/*
* TODO: kick full dynticks CPUs when
* sched_clock_stable is set.
*/
if (!sched_clock_stable()) {
trace_tick_stop(0, "unstable sched clock\n");
/*
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
WARN_ONCE(tick_nohz_full_running,
"NO_HZ FULL will not work with unstable sched clock");
return false; return false;
} }
#endif
return true; return true;
} }
static void nohz_full_kick_work_func(struct irq_work *work) static void nohz_full_kick_func(struct irq_work *work)
{ {
/* Empty, the tick restart happens on tick_nohz_irq_exit() */ /* Empty, the tick restart happens on tick_nohz_irq_exit() */
} }
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_work_func, .func = nohz_full_kick_func,
}; };
/* /*
...@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { ...@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(), * This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
* is NMI safe. * is NMI safe.
*/ */
void tick_nohz_full_kick(void) static void tick_nohz_full_kick(void)
{ {
if (!tick_nohz_full_cpu(smp_processor_id())) if (!tick_nohz_full_cpu(smp_processor_id()))
return; return;
...@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu) ...@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu)
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu); irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
} }
static void nohz_full_kick_ipi(void *info)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
/* /*
* Kick all full dynticks CPUs in order to force these to re-evaluate * Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary. * their dependency on the tick and restart it if necessary.
*/ */
void tick_nohz_full_kick_all(void) static void tick_nohz_full_kick_all(void)
{ {
int cpu;
if (!tick_nohz_full_running) if (!tick_nohz_full_running)
return; return;
preempt_disable(); preempt_disable();
smp_call_function_many(tick_nohz_full_mask, for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
nohz_full_kick_ipi, NULL, false); tick_nohz_full_kick_cpu(cpu);
tick_nohz_full_kick();
preempt_enable(); preempt_enable();
} }
static void tick_nohz_dep_set_all(unsigned long *dep,
enum tick_dep_bits bit)
{
unsigned long prev;
prev = fetch_or(dep, BIT_MASK(bit));
if (!prev)
tick_nohz_full_kick_all();
}
/*
* Set a global tick dependency. Used by perf events that rely on freq and
* by unstable clock.
*/
void tick_nohz_dep_set(enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&tick_dep_mask, bit);
}
void tick_nohz_dep_clear(enum tick_dep_bits bit)
{
clear_bit(bit, &tick_dep_mask);
}
/*
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
* manage events throttling.
*/
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
unsigned long prev;
struct tick_sched *ts;
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
if (!prev) {
preempt_disable();
/* Perf needs local kick that is NMI safe */
if (cpu == smp_processor_id()) {
tick_nohz_full_kick();
} else {
/* Remote irq work not NMI-safe */
if (!WARN_ON_ONCE(in_nmi()))
tick_nohz_full_kick_cpu(cpu);
}
preempt_enable();
}
}
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
clear_bit(bit, &ts->tick_dep_mask);
}
/*
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
* per task timers.
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
/*
* We could optimize this with just kicking the target running the task
* if that noise matters for nohz full users.
*/
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
}
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
clear_bit(bit, &tsk->tick_dep_mask);
}
/*
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
}
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
clear_bit(bit, &sig->tick_dep_mask);
}
/* /*
* Re-evaluate the need for the tick as we switch the current task. * Re-evaluate the need for the tick as we switch the current task.
* It might need the tick due to per task/process properties: * It might need the tick due to per task/process properties:
...@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void) ...@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void)
void __tick_nohz_task_switch(void) void __tick_nohz_task_switch(void)
{ {
unsigned long flags; unsigned long flags;
struct tick_sched *ts;
local_irq_save(flags); local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id())) if (!tick_nohz_full_cpu(smp_processor_id()))
goto out; goto out;
if (tick_nohz_tick_stopped() && !can_stop_full_tick()) ts = this_cpu_ptr(&tick_cpu_sched);
tick_nohz_full_kick();
if (ts->tick_stopped) {
if (current->tick_dep_mask || current->signal->tick_dep_mask)
tick_nohz_full_kick();
}
out: out:
local_irq_restore(flags); local_irq_restore(flags);
} }
...@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ...@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ts->last_tick = hrtimer_get_expires(&ts->sched_timer); ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1; ts->tick_stopped = 1;
trace_tick_stop(1, " "); trace_tick_stop(1, TICK_DEP_MASK_NONE);
} }
/* /*
...@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) ...@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE) if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return; return;
if (can_stop_full_tick()) if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped) else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get(), 1); tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
......
...@@ -60,6 +60,7 @@ struct tick_sched { ...@@ -60,6 +60,7 @@ struct tick_sched {
u64 next_timer; u64 next_timer;
ktime_t idle_expires; ktime_t idle_expires;
int do_timer_last; int do_timer_last;
unsigned long tick_dep_mask;
}; };
extern struct tick_sched *tick_get_tick_sched(int cpu); extern struct tick_sched *tick_get_tick_sched(int cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment