Commit 19035e5b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-for-linus-migration' of...

Merge branch 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  timers: Logic to move non pinned timers
  timers: /proc/sys sysctl hook to enable timer migration
  timers: Identifying the existing pinned timers
  timers: Framework for identifying pinned timers
  timers: allow deferrable timers for intervals tv2-tv5 to be deferred

Fix up conflicts in kernel/sched.c and kernel/timer.c manually
parents f9db6e09 eea08f32
...@@ -463,7 +463,7 @@ static void uv_heartbeat(unsigned long ignored) ...@@ -463,7 +463,7 @@ static void uv_heartbeat(unsigned long ignored)
uv_set_scir_bits(bits); uv_set_scir_bits(bits);
/* enable next timer period */ /* enable next timer period */
mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
} }
static void __cpuinit uv_heartbeat_enable(int cpu) static void __cpuinit uv_heartbeat_enable(int cpu)
......
...@@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg); ...@@ -143,3 +143,12 @@ extern void clockevents_notify(unsigned long reason, void *arg);
#endif #endif
#endif #endif
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern ktime_t clockevents_get_next_event(int cpu);
#else
static inline ktime_t clockevents_get_next_event(int cpu)
{
return (ktime_t) { .tv64 = KTIME_MAX };
}
#endif
...@@ -30,8 +30,11 @@ struct hrtimer_cpu_base; ...@@ -30,8 +30,11 @@ struct hrtimer_cpu_base;
* Mode arguments of xxx_hrtimer functions: * Mode arguments of xxx_hrtimer functions:
*/ */
enum hrtimer_mode { enum hrtimer_mode {
HRTIMER_MODE_ABS, /* Time value is absolute */ HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */
HRTIMER_MODE_REL, /* Time value is relative to now */ HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */
HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */
HRTIMER_MODE_ABS_PINNED = 0x02,
HRTIMER_MODE_REL_PINNED = 0x03,
}; };
/* /*
......
...@@ -261,6 +261,7 @@ extern void task_rq_unlock_wait(struct task_struct *p); ...@@ -261,6 +261,7 @@ extern void task_rq_unlock_wait(struct task_struct *p);
extern cpumask_var_t nohz_cpu_mask; extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu); extern int select_nohz_load_balancer(int cpu);
extern int get_nohz_load_balancer(void);
#else #else
static inline int select_nohz_load_balancer(int cpu) static inline int select_nohz_load_balancer(int cpu)
{ {
...@@ -1796,11 +1797,23 @@ extern unsigned int sysctl_sched_child_runs_first; ...@@ -1796,11 +1797,23 @@ extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate; extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_timer_migration;
int sched_nr_latency_handler(struct ctl_table *table, int write, int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length, struct file *file, void __user *buffer, size_t *length,
loff_t *ppos); loff_t *ppos);
#endif #endif
#ifdef CONFIG_SCHED_DEBUG
static inline unsigned int get_sysctl_timer_migration(void)
{
return sysctl_timer_migration;
}
#else
static inline unsigned int get_sysctl_timer_migration(void)
{
return 1;
}
#endif
extern unsigned int sysctl_sched_rt_period; extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime; extern int sysctl_sched_rt_runtime;
......
...@@ -163,7 +163,10 @@ extern void add_timer_on(struct timer_list *timer, int cpu); ...@@ -163,7 +163,10 @@ extern void add_timer_on(struct timer_list *timer, int cpu);
extern int del_timer(struct timer_list * timer); extern int del_timer(struct timer_list * timer);
extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
#define TIMER_NOT_PINNED 0
#define TIMER_PINNED 1
/* /*
* The jiffies value which is added to now, when there is no timer * The jiffies value which is added to now, when there is no timer
* in the timer wheel: * in the timer wheel:
......
...@@ -43,6 +43,8 @@ ...@@ -43,6 +43,8 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/debugobjects.h> #include <linux/debugobjects.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, ...@@ -193,12 +195,24 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
* Switch the timer base to the current CPU when possible. * Switch the timer base to the current CPU when possible.
*/ */
static inline struct hrtimer_clock_base * static inline struct hrtimer_clock_base *
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
int pinned)
{ {
struct hrtimer_clock_base *new_base; struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base; struct hrtimer_cpu_base *new_cpu_base;
int cpu, preferred_cpu = -1;
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
preferred_cpu = get_nohz_load_balancer();
if (preferred_cpu >= 0)
cpu = preferred_cpu;
}
#endif
new_cpu_base = &__get_cpu_var(hrtimer_bases); again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
new_base = &new_cpu_base->clock_base[base->index]; new_base = &new_cpu_base->clock_base[base->index];
if (base != new_base) { if (base != new_base) {
...@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base) ...@@ -218,6 +232,40 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
timer->base = NULL; timer->base = NULL;
spin_unlock(&base->cpu_base->lock); spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock); spin_lock(&new_base->cpu_base->lock);
/* Optimized away for NOHZ=n SMP=n */
if (cpu == preferred_cpu) {
/* Calculate clock monotonic expiry time */
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
new_base->offset);
#else
ktime_t expires = hrtimer_get_expires(timer);
#endif
/*
* Get the next event on target cpu from the
* clock events layer.
* This covers the highres=off nohz=on case as well.
*/
ktime_t next = clockevents_get_next_event(cpu);
ktime_t delta = ktime_sub(expires, next);
/*
* We do not migrate the timer when it is expiring
* before the next event on the target cpu because
* we cannot reprogram the target cpu hardware and
* we would cause it to fire late.
*/
if (delta.tv64 < 0) {
cpu = smp_processor_id();
spin_unlock(&new_base->cpu_base->lock);
spin_lock(&base->cpu_base->lock);
timer->base = base;
goto again;
}
}
timer->base = new_base; timer->base = new_base;
} }
return new_base; return new_base;
...@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) ...@@ -235,7 +283,7 @@ lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
return base; return base;
} }
# define switch_hrtimer_base(t, b) (b) # define switch_hrtimer_base(t, b, p) (b)
#endif /* !CONFIG_SMP */ #endif /* !CONFIG_SMP */
...@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, ...@@ -907,9 +955,9 @@ int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
ret = remove_hrtimer(timer, base); ret = remove_hrtimer(timer, base);
/* Switch the timer base, if necessary: */ /* Switch the timer base, if necessary: */
new_base = switch_hrtimer_base(timer, base); new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
if (mode == HRTIMER_MODE_REL) { if (mode & HRTIMER_MODE_REL) {
tim = ktime_add_safe(tim, new_base->get_time()); tim = ktime_add_safe(tim, new_base->get_time());
/* /*
* CONFIG_TIME_LOW_RES is a temporary way for architectures * CONFIG_TIME_LOW_RES is a temporary way for architectures
......
...@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) ...@@ -240,7 +240,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
hard = hrtimer_get_expires(&rt_b->rt_period_timer); hard = hrtimer_get_expires(&rt_b->rt_period_timer);
delta = ktime_to_ns(ktime_sub(hard, soft)); delta = ktime_to_ns(ktime_sub(hard, soft));
__hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
HRTIMER_MODE_ABS, 0); HRTIMER_MODE_ABS_PINNED, 0);
} }
spin_unlock(&rt_b->rt_runtime_lock); spin_unlock(&rt_b->rt_runtime_lock);
} }
...@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void) ...@@ -1155,7 +1155,7 @@ static __init void init_hrtick(void)
static void hrtick_start(struct rq *rq, u64 delay) static void hrtick_start(struct rq *rq, u64 delay)
{ {
__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0, __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
HRTIMER_MODE_REL, 0); HRTIMER_MODE_REL_PINNED, 0);
} }
static inline void init_hrtick(void) static inline void init_hrtick(void)
...@@ -4397,6 +4397,11 @@ static struct { ...@@ -4397,6 +4397,11 @@ static struct {
.load_balancer = ATOMIC_INIT(-1), .load_balancer = ATOMIC_INIT(-1),
}; };
int get_nohz_load_balancer(void)
{
return atomic_read(&nohz.load_balancer);
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
/** /**
* lowest_flag_domain - Return lowest sched_domain containing flag. * lowest_flag_domain - Return lowest sched_domain containing flag.
...@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void) ...@@ -9029,6 +9034,8 @@ void __init sched_init_smp(void)
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
const_debug unsigned int sysctl_timer_migration = 1;
int in_sched_functions(unsigned long addr) int in_sched_functions(unsigned long addr)
{ {
return in_lock_functions(addr) || return in_lock_functions(addr) ||
......
...@@ -328,6 +328,14 @@ static struct ctl_table kern_table[] = { ...@@ -328,6 +328,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "timer_migration",
.data = &sysctl_timer_migration,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif #endif
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/sysdev.h> #include <linux/sysdev.h>
#include <linux/tick.h>
/* The registered clock event devices */ /* The registered clock event devices */
static LIST_HEAD(clockevent_devices); static LIST_HEAD(clockevent_devices);
...@@ -253,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg) ...@@ -253,4 +254,15 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock); spin_unlock(&clockevents_lock);
} }
EXPORT_SYMBOL_GPL(clockevents_notify); EXPORT_SYMBOL_GPL(clockevents_notify);
ktime_t clockevents_get_next_event(int cpu)
{
struct tick_device *td;
struct clock_event_device *dev;
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
return dev->next_event;
}
#endif #endif
...@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -349,7 +349,7 @@ void tick_nohz_stop_sched_tick(int inidle)
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, expires, hrtimer_start(&ts->sched_timer, expires,
HRTIMER_MODE_ABS); HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */ /* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer)) if (hrtimer_active(&ts->sched_timer))
goto out; goto out;
...@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now) ...@@ -395,7 +395,7 @@ static void tick_nohz_restart(struct tick_sched *ts, ktime_t now)
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start_expires(&ts->sched_timer, hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS); HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */ /* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer)) if (hrtimer_active(&ts->sched_timer))
break; break;
...@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void) ...@@ -698,7 +698,8 @@ void tick_setup_sched_timer(void)
for (;;) { for (;;) {
hrtimer_forward(&ts->sched_timer, now, tick_period); hrtimer_forward(&ts->sched_timer, now, tick_period);
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS); hrtimer_start_expires(&ts->sched_timer,
HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */ /* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer)) if (hrtimer_active(&ts->sched_timer))
break; break;
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/perf_counter.h> #include <linux/perf_counter.h>
#include <linux/sched.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/unistd.h> #include <asm/unistd.h>
...@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer, ...@@ -605,13 +606,12 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
} }
static inline int static inline int
__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) __mod_timer(struct timer_list *timer, unsigned long expires,
bool pending_only, int pinned)
{ {
struct tvec_base *base, *new_base; struct tvec_base *base, *new_base;
unsigned long flags; unsigned long flags;
int ret; int ret = 0 , cpu;
ret = 0;
timer_stats_timer_set_start_info(timer); timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function); BUG_ON(!timer->function);
...@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) ...@@ -630,6 +630,18 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
new_base = __get_cpu_var(tvec_bases); new_base = __get_cpu_var(tvec_bases);
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
int preferred_cpu = get_nohz_load_balancer();
if (preferred_cpu >= 0)
cpu = preferred_cpu;
}
#endif
new_base = per_cpu(tvec_bases, cpu);
if (base != new_base) { if (base != new_base) {
/* /*
* We are trying to schedule the timer on the local CPU. * We are trying to schedule the timer on the local CPU.
...@@ -669,7 +681,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) ...@@ -669,7 +681,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
*/ */
int mod_timer_pending(struct timer_list *timer, unsigned long expires) int mod_timer_pending(struct timer_list *timer, unsigned long expires)
{ {
return __mod_timer(timer, expires, true); return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
} }
EXPORT_SYMBOL(mod_timer_pending); EXPORT_SYMBOL(mod_timer_pending);
...@@ -703,10 +715,32 @@ int mod_timer(struct timer_list *timer, unsigned long expires) ...@@ -703,10 +715,32 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
if (timer->expires == expires && timer_pending(timer)) if (timer->expires == expires && timer_pending(timer))
return 1; return 1;
return __mod_timer(timer, expires, false); return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
} }
EXPORT_SYMBOL(mod_timer); EXPORT_SYMBOL(mod_timer);
/**
* mod_timer_pinned - modify a timer's timeout
* @timer: the timer to be modified
* @expires: new timeout in jiffies
*
* mod_timer_pinned() is a way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
* and not allow the timer to be migrated to a different CPU.
*
* mod_timer_pinned(timer, expires) is equivalent to:
*
* del_timer(timer); timer->expires = expires; add_timer(timer);
*/
int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
{
if (timer->expires == expires && timer_pending(timer))
return 1;
return __mod_timer(timer, expires, false, TIMER_PINNED);
}
EXPORT_SYMBOL(mod_timer_pinned);
/** /**
* add_timer - start a timer * add_timer - start a timer
* @timer: the timer to be added * @timer: the timer to be added
...@@ -1017,6 +1051,9 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base) ...@@ -1017,6 +1051,9 @@ static unsigned long __next_timer_interrupt(struct tvec_base *base)
index = slot = timer_jiffies & TVN_MASK; index = slot = timer_jiffies & TVN_MASK;
do { do {
list_for_each_entry(nte, varp->vec + slot, entry) { list_for_each_entry(nte, varp->vec + slot, entry) {
if (tbase_get_deferrable(nte->base))
continue;
found = 1; found = 1;
if (time_before(nte->expires, expires)) if (time_before(nte->expires, expires))
expires = nte->expires; expires = nte->expires;
...@@ -1307,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout) ...@@ -1307,7 +1344,7 @@ signed long __sched schedule_timeout(signed long timeout)
expire = timeout + jiffies; expire = timeout + jiffies;
setup_timer_on_stack(&timer, process_timeout, (unsigned long)current); setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
__mod_timer(&timer, expire, false); __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
schedule(); schedule();
del_singleshot_timer_sync(&timer); del_singleshot_timer_sync(&timer);
......
...@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused) ...@@ -203,7 +203,8 @@ static void start_stack_timer(void *unused)
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn; hrtimer->function = stack_trace_timer_fn;
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL); hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
} }
static void start_stack_timers(void) static void start_stack_timers(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment