Commit e23604ed authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull NOHZ updates from Ingo Molnar:
 "NOHZ enhancements, by Frederic Weisbecker, which reorganizes/refactors
  the NOHZ 'can the tick be stopped?' infrastructure and related code to
  be data driven, and harmonizes the naming and handling of all the
  various properties"

[ This makes the ugly "fetch_or()" macro that the scheduler used
  internally a new generic helper, and does a bad job at it.

  I'm pulling it, but I've asked Ingo and Frederic to get this
  fixed up ]

* 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched-clock: Migrate to use new tick dependency mask model
  posix-cpu-timers: Migrate to use new tick dependency mask model
  sched: Migrate sched to use new tick dependency mask model
  sched: Account rr tasks
  perf: Migrate perf to use new tick dependency mask model
  nohz: Use enum code for tick stop failure tracing message
  nohz: New tick dependency mask
  nohz: Implement wide kick on top of irq work
  atomic: Export fetch_or()
parents d4e79615 1f251846
......@@ -548,6 +548,27 @@ static inline int atomic_dec_if_positive(atomic_t *v)
}
#endif
/**
* fetch_or - perform *ptr |= mask and return old value of *ptr
* @ptr: pointer to value
* @mask: mask to OR on the value
*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#ifndef fetch_or
#define fetch_or(ptr, mask) \
({ typeof(*(ptr)) __old, __val = *(ptr); \
for (;;) { \
__old = cmpxchg((ptr), __val, __val | (mask)); \
if (__old == __val) \
break; \
__val = __old; \
} \
__old; \
})
#endif
#ifdef CONFIG_GENERIC_ATOMIC64
#include <asm-generic/atomic64.h>
#endif
......
......@@ -1110,12 +1110,6 @@ static inline void perf_event_task_tick(void) { }
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_NO_HZ_FULL)
extern bool perf_event_can_stop_tick(void);
#else
static inline bool perf_event_can_stop_tick(void) { return true; }
#endif
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
extern void perf_restore_debug_store(void);
#else
......
......@@ -128,9 +128,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer);
void run_posix_cpu_timers(struct task_struct *task);
void posix_cpu_timers_exit(struct task_struct *task);
void posix_cpu_timers_exit_group(struct task_struct *task);
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk);
void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval);
......
......@@ -717,6 +717,10 @@ struct signal_struct {
/* Earliest-expiration cache. */
struct task_cputime cputime_expires;
#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
struct list_head cpu_timers[3];
struct pid *tty_old_pgrp;
......@@ -1542,6 +1546,10 @@ struct task_struct {
VTIME_SYS,
} vtime_snap_whence;
#endif
#ifdef CONFIG_NO_HZ_FULL
unsigned long tick_dep_mask;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
u64 start_time; /* monotonic time in nsec */
u64 real_start_time; /* boot based time in nsec */
......@@ -2356,10 +2364,7 @@ static inline void wake_up_nohz_cpu(int cpu) { }
#endif
#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(void);
extern u64 scheduler_tick_max_deferment(void);
#else
static inline bool sched_can_stop_tick(void) { return false; }
#endif
#ifdef CONFIG_SCHED_AUTOGROUP
......
......@@ -97,6 +97,19 @@ static inline void tick_broadcast_exit(void)
tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
}
enum tick_dep_bits {
TICK_DEP_BIT_POSIX_TIMER = 0,
TICK_DEP_BIT_PERF_EVENTS = 1,
TICK_DEP_BIT_SCHED = 2,
TICK_DEP_BIT_CLOCK_UNSTABLE = 3
};
#define TICK_DEP_MASK_NONE 0
#define TICK_DEP_MASK_POSIX_TIMER (1 << TICK_DEP_BIT_POSIX_TIMER)
#define TICK_DEP_MASK_PERF_EVENTS (1 << TICK_DEP_BIT_PERF_EVENTS)
#define TICK_DEP_MASK_SCHED (1 << TICK_DEP_BIT_SCHED)
#define TICK_DEP_MASK_CLOCK_UNSTABLE (1 << TICK_DEP_BIT_CLOCK_UNSTABLE)
#ifdef CONFIG_NO_HZ_COMMON
extern int tick_nohz_enabled;
extern int tick_nohz_tick_stopped(void);
......@@ -154,9 +167,73 @@ static inline int housekeeping_any_cpu(void)
return cpumask_any_and(housekeeping_mask, cpu_online_mask);
}
extern void tick_nohz_full_kick(void);
extern void tick_nohz_dep_set(enum tick_dep_bits bit);
extern void tick_nohz_dep_clear(enum tick_dep_bits bit);
extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit);
extern void tick_nohz_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit);
extern void tick_nohz_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
extern void tick_nohz_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit);
/*
* The below are tick_nohz_[set,clear]_dep() wrappers that optimize off-cases
* on top of static keys.
*/
static inline void tick_dep_set(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set(bit);
}
static inline void tick_dep_clear(enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear(bit);
}
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_set_cpu(cpu, bit);
}
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
if (tick_nohz_full_cpu(cpu))
tick_nohz_dep_clear_cpu(cpu, bit);
}
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_set_signal(signal, bit);
}
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_signal(signal, bit);
}
extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(void);
#else
static inline int housekeeping_any_cpu(void)
......@@ -166,9 +243,21 @@ static inline int housekeeping_any_cpu(void)
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { }
static inline void tick_dep_set(enum tick_dep_bits bit) { }
static inline void tick_dep_clear(enum tick_dep_bits bit) { }
static inline void tick_dep_set_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { }
static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_set_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
enum tick_dep_bits bit) { }
static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(void) { }
#endif
......
......@@ -328,23 +328,49 @@ TRACE_EVENT(itimer_expire,
);
#ifdef CONFIG_NO_HZ_COMMON
#define TICK_DEP_NAMES \
tick_dep_name(NONE) \
tick_dep_name(POSIX_TIMER) \
tick_dep_name(PERF_EVENTS) \
tick_dep_name(SCHED) \
tick_dep_name_end(CLOCK_UNSTABLE)
#undef tick_dep_name
#undef tick_dep_name_end
#define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
#define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep);
TICK_DEP_NAMES
#undef tick_dep_name
#undef tick_dep_name_end
#define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep },
#define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep }
#define show_tick_dep_name(val) \
__print_symbolic(val, TICK_DEP_NAMES)
TRACE_EVENT(tick_stop,
TP_PROTO(int success, char *error_msg),
TP_PROTO(int success, int dependency),
TP_ARGS(success, error_msg),
TP_ARGS(success, dependency),
TP_STRUCT__entry(
__field( int , success )
__string( msg, error_msg )
__field( int , dependency )
),
TP_fast_assign(
__entry->success = success;
__assign_str(msg, error_msg);
__entry->dependency = dependency;
),
TP_printk("success=%s msg=%s", __entry->success ? "yes" : "no", __get_str(msg))
TP_printk("success=%d dependency=%s", __entry->success, \
show_tick_dep_name(__entry->dependency))
);
#endif
......
......@@ -3112,17 +3112,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
return rotate;
}
#ifdef CONFIG_NO_HZ_FULL
bool perf_event_can_stop_tick(void)
{
if (atomic_read(&nr_freq_events) ||
__this_cpu_read(perf_throttled_count))
return false;
else
return true;
}
#endif
void perf_event_task_tick(void)
{
struct list_head *head = this_cpu_ptr(&active_ctx_list);
......@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
__this_cpu_inc(perf_throttled_seq);
throttled = __this_cpu_xchg(perf_throttled_count, 0);
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
perf_adjust_freq_unthr_context(ctx, throttled);
......@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
}
#ifdef CONFIG_NO_HZ_FULL
static DEFINE_SPINLOCK(nr_freq_lock);
#endif
static void unaccount_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
spin_lock(&nr_freq_lock);
if (atomic_dec_and_test(&nr_freq_events))
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}
static void unaccount_freq_event(void)
{
if (tick_nohz_full_enabled())
unaccount_freq_event_nohz();
else
atomic_dec(&nr_freq_events);
}
static void unaccount_event(struct perf_event *event)
{
bool dec = false;
......@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
atomic_dec(&nr_freq_events);
unaccount_freq_event();
if (event->attr.context_switch) {
dec = true;
atomic_dec(&nr_switch_events);
......@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
if (unlikely(throttle
&& hwc->interrupts >= max_samples_per_tick)) {
__this_cpu_inc(perf_throttled_count);
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
hwc->interrupts = MAX_INTERRUPTS;
perf_log_throttle(event, 0);
tick_nohz_full_kick();
ret = 1;
}
}
......@@ -7815,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
}
/* Freq events need the tick to stay alive (see perf_event_task_tick). */
static void account_freq_event_nohz(void)
{
#ifdef CONFIG_NO_HZ_FULL
/* Lock so we don't race with concurrent unaccount */
spin_lock(&nr_freq_lock);
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
spin_unlock(&nr_freq_lock);
#endif
}
static void account_freq_event(void)
{
if (tick_nohz_full_enabled())
account_freq_event_nohz();
else
atomic_inc(&nr_freq_events);
}
static void account_event(struct perf_event *event)
{
bool inc = false;
......@@ -7830,10 +7863,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_comm_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq) {
if (atomic_inc_return(&nr_freq_events) == 1)
tick_nohz_full_kick_all();
}
if (event->attr.freq)
account_freq_event();
if (event->attr.context_switch) {
atomic_inc(&nr_switch_events);
inc = true;
......
......@@ -61,6 +61,7 @@
#include <linux/static_key.h>
#include <linux/workqueue.h>
#include <linux/compiler.h>
#include <linux/tick.h>
/*
* Scheduler clock - returns current time in nanosec units.
......@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void)
{
if (!sched_clock_stable())
static_key_slow_inc(&__sched_clock_stable);
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
}
void set_sched_clock_stable(void)
......@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work)
/* XXX worry about clock continuity */
if (sched_clock_stable())
static_key_slow_dec(&__sched_clock_stable);
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
}
static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
......
......@@ -320,20 +320,6 @@ static inline void init_hrtick(void)
}
#endif /* CONFIG_SCHED_HRTICK */
/*
* cmpxchg based fetch_or, macro so it works for different integer types
*/
#define fetch_or(ptr, val) \
({ typeof(*(ptr)) __old, __val = *(ptr); \
for (;;) { \
__old = cmpxchg((ptr), __val, __val | (val)); \
if (__old == __val) \
break; \
__val = __old; \
} \
__old; \
})
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
/*
* Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
......@@ -582,31 +568,36 @@ static inline bool got_nohz_idle_kick(void)
#endif /* CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL
bool sched_can_stop_tick(void)
bool sched_can_stop_tick(struct rq *rq)
{
int fifo_nr_running;
/* Deadline tasks, even if single, need the tick */
if (rq->dl.dl_nr_running)
return false;
/*
* FIFO realtime policy runs the highest priority task. Other runnable
* tasks are of a lower priority. The scheduler tick does nothing.
* FIFO realtime policy runs the highest priority task (after DEADLINE).
* Other runnable tasks are of a lower priority. The scheduler tick
* isn't needed.
*/
if (current->policy == SCHED_FIFO)
fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
if (fifo_nr_running)
return true;
/*
* Round-robin realtime tasks time slice with other tasks at the same
* realtime priority. Is this task the only one at this priority?
* realtime priority.
*/
if (current->policy == SCHED_RR) {
struct sched_rt_entity *rt_se = &current->rt;
return list_is_singular(&rt_se->run_list);
if (rq->rt.rr_nr_running) {
if (rq->rt.rr_nr_running == 1)
return true;
else
return false;
}
/*
* More than one running task need preemption.
* nr_running update is assumed to be visible
* after IPI is sent from wakers.
*/
if (this_rq()->nr_running > 1)
/* Normal multitasking need periodic preemption checks */
if (rq->cfs.nr_running > 1)
return false;
return true;
......
......@@ -1149,6 +1149,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
return 1;
}
static inline
unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
{
struct rt_rq *group_rq = group_rt_rq(rt_se);
struct task_struct *tsk;
if (group_rq)
return group_rq->rr_nr_running;
tsk = rt_task_of(rt_se);
return (tsk->policy == SCHED_RR) ? 1 : 0;
}
static inline
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
......@@ -1156,6 +1170,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
inc_rt_prio(rt_rq, prio);
inc_rt_migration(rt_se, rt_rq);
......@@ -1168,6 +1183,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
dec_rt_prio(rt_rq, rt_se_prio(rt_se));
dec_rt_migration(rt_se, rt_rq);
......
......@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void)
struct rt_rq {
struct rt_prio_array active;
unsigned int rt_nr_running;
unsigned int rr_nr_running;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
struct {
int curr; /* highest queued rt task prio */
......@@ -1313,6 +1314,35 @@ unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se);
#ifdef CONFIG_NO_HZ_FULL
extern bool sched_can_stop_tick(struct rq *rq);
/*
* Tick may be needed by tasks in the runqueue depending on their policy and
* requirements. If tick is needed, lets send the target an IPI to kick it out of
* nohz mode if necessary.
*/
static inline void sched_update_tick_dependency(struct rq *rq)
{
int cpu;
if (!tick_nohz_full_enabled())
return;
cpu = cpu_of(rq);
if (!tick_nohz_full_cpu(cpu))
return;
if (sched_can_stop_tick(rq))
tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
else
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
}
#else
static inline void sched_update_tick_dependency(struct rq *rq) { }
#endif
static inline void add_nr_running(struct rq *rq, unsigned count)
{
unsigned prev_nr = rq->nr_running;
......@@ -1324,26 +1354,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
if (!rq->rd->overload)
rq->rd->overload = true;
#endif
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_cpu(rq->cpu)) {
/*
* Tick is needed if more than one task runs on a CPU.
* Send the target an IPI to kick it out of nohz mode.
*
* We assume that IPI implies full memory barrier and the
* new value of rq->nr_running is visible on reception
* from the target.
*/
tick_nohz_full_kick_cpu(rq->cpu);
}
#endif
}
sched_update_tick_dependency(rq);
}
static inline void sub_nr_running(struct rq *rq, unsigned count)
{
rq->nr_running -= count;
/* Check if we still need preemption */
sched_update_tick_dependency(rq);
}
static inline void rq_last_tick_reset(struct rq *rq)
......
......@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
return err;
}
/*
* Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
......@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
cputime_expires->sched_exp = exp;
break;
}
if (CPUCLOCK_PERTHREAD(timer->it_clock))
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
else
tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
}
}
......@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
return 0;
}
#ifdef CONFIG_NO_HZ_FULL
static void nohz_kick_work_fn(struct work_struct *work)
{
tick_nohz_full_kick_all();
}
static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
/*
* We need the IPIs to be sent from sane process context.
* The posix cpu timers are always set with irqs disabled.
*/
static void posix_cpu_timer_kick_nohz(void)
{
if (context_tracking_is_enabled())
schedule_work(&nohz_kick_work);
}
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
{
if (!task_cputime_zero(&tsk->cputime_expires))
return false;
/* Check if cputimer is running. This is accessed without locking. */
if (READ_ONCE(tsk->signal->cputimer.running))
return false;
return true;
}
#else
static inline void posix_cpu_timer_kick_nohz(void) { }
#endif
/*
* Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled.
......@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
sample_to_timespec(timer->it_clock,
old_incr, &old->it_interval);
}
if (!ret)
posix_cpu_timer_kick_nohz();
return ret;
}
......@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
}
}
if (task_cputime_zero(tsk_expires))
tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
}
static inline void stop_process_timers(struct signal_struct *sig)
......@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
/* Turn off cputimer->running. This is done without locking. */
WRITE_ONCE(cputimer->running, false);
tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
}
static u32 onecputick;
......@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
arm_timer(timer);
unlock_task_sighand(p, &flags);
/* Kick full dynticks CPUs in case they need to tick on the new timer */
posix_cpu_timer_kick_nohz();
out:
timer->it_overrun_last = timer->it_overrun;
timer->it_overrun = -1;
......@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
}
if (!*newval)
goto out;
return;
*newval += now;
}
......@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
tsk->signal->cputime_expires.virt_exp = *newval;
break;
}
out:
posix_cpu_timer_kick_nohz();
tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
}
static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
......
......@@ -22,7 +22,6 @@
#include <linux/module.h>
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
#include <linux/perf_event.h>
#include <linux/context_tracking.h>
#include <asm/irq_regs.h>
......@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
cpumask_var_t tick_nohz_full_mask;
cpumask_var_t housekeeping_mask;
bool tick_nohz_full_running;
static unsigned long tick_dep_mask;
static bool can_stop_full_tick(void)
static void trace_tick_dependency(unsigned long dep)
{
if (dep & TICK_DEP_MASK_POSIX_TIMER) {
trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
return;
}
if (dep & TICK_DEP_MASK_PERF_EVENTS) {
trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
return;
}
if (dep & TICK_DEP_MASK_SCHED) {
trace_tick_stop(0, TICK_DEP_MASK_SCHED);
return;
}
if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
}
static bool can_stop_full_tick(struct tick_sched *ts)
{
WARN_ON_ONCE(!irqs_disabled());
if (!sched_can_stop_tick()) {
trace_tick_stop(0, "more than 1 task in runqueue\n");
if (tick_dep_mask) {
trace_tick_dependency(tick_dep_mask);
return false;
}
if (!posix_cpu_timers_can_stop_tick(current)) {
trace_tick_stop(0, "posix timers running\n");
if (ts->tick_dep_mask) {
trace_tick_dependency(ts->tick_dep_mask);
return false;
}
if (!perf_event_can_stop_tick()) {
trace_tick_stop(0, "perf events running\n");
if (current->tick_dep_mask) {
trace_tick_dependency(current->tick_dep_mask);
return false;
}
/* sched_clock_tick() needs us? */
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
/*
* TODO: kick full dynticks CPUs when
* sched_clock_stable is set.
*/
if (!sched_clock_stable()) {
trace_tick_stop(0, "unstable sched clock\n");
/*
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
WARN_ONCE(tick_nohz_full_running,
"NO_HZ FULL will not work with unstable sched clock");
if (current->signal->tick_dep_mask) {
trace_tick_dependency(current->signal->tick_dep_mask);
return false;
}
#endif
return true;
}
static void nohz_full_kick_work_func(struct irq_work *work)
static void nohz_full_kick_func(struct irq_work *work)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
.func = nohz_full_kick_work_func,
.func = nohz_full_kick_func,
};
/*
......@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
* is NMI safe.
*/
void tick_nohz_full_kick(void)
static void tick_nohz_full_kick(void)
{
if (!tick_nohz_full_cpu(smp_processor_id()))
return;
......@@ -234,25 +242,110 @@ void tick_nohz_full_kick_cpu(int cpu)
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
}
static void nohz_full_kick_ipi(void *info)
{
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
}
/*
* Kick all full dynticks CPUs in order to force these to re-evaluate
* their dependency on the tick and restart it if necessary.
*/
void tick_nohz_full_kick_all(void)
static void tick_nohz_full_kick_all(void)
{
int cpu;
if (!tick_nohz_full_running)
return;
preempt_disable();
smp_call_function_many(tick_nohz_full_mask,
nohz_full_kick_ipi, NULL, false);
for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
tick_nohz_full_kick_cpu(cpu);
preempt_enable();
}
static void tick_nohz_dep_set_all(unsigned long *dep,
enum tick_dep_bits bit)
{
unsigned long prev;
prev = fetch_or(dep, BIT_MASK(bit));
if (!prev)
tick_nohz_full_kick_all();
}
/*
* Set a global tick dependency. Used by perf events that rely on freq and
* by unstable clock.
*/
void tick_nohz_dep_set(enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&tick_dep_mask, bit);
}
void tick_nohz_dep_clear(enum tick_dep_bits bit)
{
clear_bit(bit, &tick_dep_mask);
}
/*
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
* manage events throttling.
*/
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
{
unsigned long prev;
struct tick_sched *ts;
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
if (!prev) {
preempt_disable();
/* Perf needs local kick that is NMI safe */
if (cpu == smp_processor_id()) {
tick_nohz_full_kick();
} else {
/* Remote irq work not NMI-safe */
if (!WARN_ON_ONCE(in_nmi()))
tick_nohz_full_kick_cpu(cpu);
}
preempt_enable();
}
}
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
{
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
clear_bit(bit, &ts->tick_dep_mask);
}
/*
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
* per task timers.
*/
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
/*
* We could optimize this with just kicking the target running the task
* if that noise matters for nohz full users.
*/
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
}
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
{
clear_bit(bit, &tsk->tick_dep_mask);
}
/*
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
* per process timers.
*/
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
}
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
{
clear_bit(bit, &sig->tick_dep_mask);
}
/*
......@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void)
void __tick_nohz_task_switch(void)
{
unsigned long flags;
struct tick_sched *ts;
local_irq_save(flags);
if (!tick_nohz_full_cpu(smp_processor_id()))
goto out;
if (tick_nohz_tick_stopped() && !can_stop_full_tick())
tick_nohz_full_kick();
ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->tick_stopped) {
if (current->tick_dep_mask || current->signal->tick_dep_mask)
tick_nohz_full_kick();
}
out:
local_irq_restore(flags);
}
......@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
trace_tick_stop(1, " ");
trace_tick_stop(1, TICK_DEP_MASK_NONE);
}
/*
......@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
return;
if (can_stop_full_tick())
if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
......
......@@ -60,6 +60,7 @@ struct tick_sched {
u64 next_timer;
ktime_t idle_expires;
int do_timer_last;
unsigned long tick_dep_mask;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment