Commit 3070f27d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-fixes-for-linus' of...

Merge branch 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'timers-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  itimer: Fix the itimer trace print format
  hrtimer: move timer stats helper functions to hrtimer.c
  hrtimer: Tune hrtimer_interrupt hang logic
parents 1e57c218 e9c0748b
...@@ -162,10 +162,11 @@ struct hrtimer_clock_base { ...@@ -162,10 +162,11 @@ struct hrtimer_clock_base {
* @expires_next: absolute time of the next event which was scheduled * @expires_next: absolute time of the next event which was scheduled
* via clock_set_next_event() * via clock_set_next_event()
* @hres_active: State of high resolution mode * @hres_active: State of high resolution mode
* @check_clocks: Indictator, when set evaluate time source and clock * @hang_detected: The last hrtimer interrupt detected a hang
* event devices whether high resolution mode can be * @nr_events: Total number of hrtimer interrupt events
* activated. * @nr_retries: Total number of hrtimer interrupt retries
* @nr_events: Total number of timer interrupt events * @nr_hangs: Total number of hrtimer interrupt hangs
* @max_hang_time: Maximum time spent in hrtimer_interrupt
*/ */
struct hrtimer_cpu_base { struct hrtimer_cpu_base {
spinlock_t lock; spinlock_t lock;
...@@ -173,7 +174,11 @@ struct hrtimer_cpu_base { ...@@ -173,7 +174,11 @@ struct hrtimer_cpu_base {
#ifdef CONFIG_HIGH_RES_TIMERS #ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires_next; ktime_t expires_next;
int hres_active; int hres_active;
int hang_detected;
unsigned long nr_events; unsigned long nr_events;
unsigned long nr_retries;
unsigned long nr_hangs;
ktime_t max_hang_time;
#endif #endif
}; };
...@@ -435,47 +440,4 @@ extern u64 ktime_divns(const ktime_t kt, s64 div); ...@@ -435,47 +440,4 @@ extern u64 ktime_divns(const ktime_t kt, s64 div);
/* Show pending timers: */ /* Show pending timers: */
extern void sysrq_timer_list_show(void); extern void sysrq_timer_list_show(void);
/*
* Timer-statistics info:
*/
#ifdef CONFIG_TIMER_STATS
extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
void *timerf, char *comm,
unsigned int timer_flag);
static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
{
if (likely(!timer_stats_active))
return;
timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
timer->function, timer->start_comm, 0);
}
extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
void *addr);
static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
{
__timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0));
}
static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
{
timer->start_site = NULL;
}
#else
static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
{
}
static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
{
}
static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
{
}
#endif
#endif #endif
...@@ -301,8 +301,8 @@ TRACE_EVENT(itimer_state, ...@@ -301,8 +301,8 @@ TRACE_EVENT(itimer_state,
__entry->interval_usec = value->it_interval.tv_usec; __entry->interval_usec = value->it_interval.tv_usec;
), ),
TP_printk("which=%d expires=%lu it_value=%lu.%lu it_interval=%lu.%lu", TP_printk("which=%d expires=%llu it_value=%ld.%ld it_interval=%ld.%ld",
__entry->which, __entry->expires, __entry->which, (unsigned long long)__entry->expires,
__entry->value_sec, __entry->value_usec, __entry->value_sec, __entry->value_usec,
__entry->interval_sec, __entry->interval_usec) __entry->interval_sec, __entry->interval_usec)
); );
...@@ -331,8 +331,8 @@ TRACE_EVENT(itimer_expire, ...@@ -331,8 +331,8 @@ TRACE_EVENT(itimer_expire,
__entry->pid = pid_nr(pid); __entry->pid = pid_nr(pid);
), ),
TP_printk("which=%d pid=%d now=%lu", __entry->which, TP_printk("which=%d pid=%d now=%llu", __entry->which,
(int) __entry->pid, __entry->now) (int) __entry->pid, (unsigned long long)__entry->now)
); );
#endif /* _TRACE_TIMER_H */ #endif /* _TRACE_TIMER_H */
......
...@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) ...@@ -557,7 +557,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
static int hrtimer_reprogram(struct hrtimer *timer, static int hrtimer_reprogram(struct hrtimer *timer,
struct hrtimer_clock_base *base) struct hrtimer_clock_base *base)
{ {
ktime_t *expires_next = &__get_cpu_var(hrtimer_bases).expires_next; struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
int res; int res;
...@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer, ...@@ -582,7 +582,16 @@ static int hrtimer_reprogram(struct hrtimer *timer,
if (expires.tv64 < 0) if (expires.tv64 < 0)
return -ETIME; return -ETIME;
if (expires.tv64 >= expires_next->tv64) if (expires.tv64 >= cpu_base->expires_next.tv64)
return 0;
/*
* If a hang was detected in the last timer interrupt then we
* do not schedule a timer which is earlier than the expiry
* which we enforced in the hang detection. We want the system
* to make progress.
*/
if (cpu_base->hang_detected)
return 0; return 0;
/* /*
...@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer, ...@@ -590,7 +599,7 @@ static int hrtimer_reprogram(struct hrtimer *timer,
*/ */
res = tick_program_event(expires, 0); res = tick_program_event(expires, 0);
if (!IS_ERR_VALUE(res)) if (!IS_ERR_VALUE(res))
*expires_next = expires; cpu_base->expires_next = expires;
return res; return res;
} }
...@@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } ...@@ -747,17 +756,33 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
#endif /* CONFIG_HIGH_RES_TIMERS */ #endif /* CONFIG_HIGH_RES_TIMERS */
#ifdef CONFIG_TIMER_STATS static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer, void *addr)
{ {
#ifdef CONFIG_TIMER_STATS
if (timer->start_site) if (timer->start_site)
return; return;
timer->start_site = __builtin_return_address(0);
timer->start_site = addr;
memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); memcpy(timer->start_comm, current->comm, TASK_COMM_LEN);
timer->start_pid = current->pid; timer->start_pid = current->pid;
#endif
}
static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer)
{
#ifdef CONFIG_TIMER_STATS
timer->start_site = NULL;
#endif
} }
static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
{
#ifdef CONFIG_TIMER_STATS
if (likely(!timer_stats_active))
return;
timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
timer->function, timer->start_comm, 0);
#endif #endif
}
/* /*
* Counterpart to lock_hrtimer_base above: * Counterpart to lock_hrtimer_base above:
...@@ -1217,30 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now) ...@@ -1217,30 +1242,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
#ifdef CONFIG_HIGH_RES_TIMERS #ifdef CONFIG_HIGH_RES_TIMERS
static int force_clock_reprogram;
/*
* After 5 iteration's attempts, we consider that hrtimer_interrupt()
* is hanging, which could happen with something that slows the interrupt
* such as the tracing. Then we force the clock reprogramming for each future
* hrtimer interrupts to avoid infinite loops and use the min_delta_ns
* threshold that we will overwrite.
* The next tick event will be scheduled to 3 times we currently spend on
* hrtimer_interrupt(). This gives a good compromise, the cpus will spend
* 1/4 of their time to process the hrtimer interrupts. This is enough to
* let it running without serious starvation.
*/
static inline void
hrtimer_interrupt_hanging(struct clock_event_device *dev,
ktime_t try_time)
{
force_clock_reprogram = 1;
dev->min_delta_ns = (unsigned long)try_time.tv64 * 3;
printk(KERN_WARNING "hrtimer: interrupt too slow, "
"forcing clock min delta to %llu ns\n",
(unsigned long long) dev->min_delta_ns);
}
/* /*
* High resolution timer interrupt * High resolution timer interrupt
* Called with interrupts disabled * Called with interrupts disabled
...@@ -1249,21 +1250,15 @@ void hrtimer_interrupt(struct clock_event_device *dev) ...@@ -1249,21 +1250,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
{ {
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
struct hrtimer_clock_base *base; struct hrtimer_clock_base *base;
ktime_t expires_next, now; ktime_t expires_next, now, entry_time, delta;
int nr_retries = 0; int i, retries = 0;
int i;
BUG_ON(!cpu_base->hres_active); BUG_ON(!cpu_base->hres_active);
cpu_base->nr_events++; cpu_base->nr_events++;
dev->next_event.tv64 = KTIME_MAX; dev->next_event.tv64 = KTIME_MAX;
retry: entry_time = now = ktime_get();
/* 5 retries is enough to notice a hang */ retry:
if (!(++nr_retries % 5))
hrtimer_interrupt_hanging(dev, ktime_sub(ktime_get(), now));
now = ktime_get();
expires_next.tv64 = KTIME_MAX; expires_next.tv64 = KTIME_MAX;
spin_lock(&cpu_base->lock); spin_lock(&cpu_base->lock);
...@@ -1325,10 +1320,48 @@ void hrtimer_interrupt(struct clock_event_device *dev) ...@@ -1325,10 +1320,48 @@ void hrtimer_interrupt(struct clock_event_device *dev)
spin_unlock(&cpu_base->lock); spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */ /* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) { if (expires_next.tv64 == KTIME_MAX ||
if (tick_program_event(expires_next, force_clock_reprogram)) !tick_program_event(expires_next, 0)) {
goto retry; cpu_base->hang_detected = 0;
return;
} }
/*
* The next timer was already expired due to:
* - tracing
* - long lasting callbacks
* - being scheduled away when running in a VM
*
* We need to prevent that we loop forever in the hrtimer
* interrupt routine. We give it 3 attempts to avoid
* overreacting on some spurious event.
*/
now = ktime_get();
cpu_base->nr_retries++;
if (++retries < 3)
goto retry;
/*
* Give the system a chance to do something else than looping
* here. We stored the entry time, so we know exactly how long
* we spent here. We schedule the next event this amount of
* time away.
*/
cpu_base->nr_hangs++;
cpu_base->hang_detected = 1;
delta = ktime_sub(now, entry_time);
if (delta.tv64 > cpu_base->max_hang_time.tv64)
cpu_base->max_hang_time = delta;
/*
* Limit it to a sensible value as we enforce a longer
* delay. Give the CPU at least 100ms to catch up.
*/
if (delta.tv64 > 100 * NSEC_PER_MSEC)
expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
else
expires_next = ktime_add(now, delta);
tick_program_event(expires_next, 1);
printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n",
ktime_to_ns(delta));
} }
/* /*
......
...@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now) ...@@ -150,6 +150,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
P_ns(expires_next); P_ns(expires_next);
P(hres_active); P(hres_active);
P(nr_events); P(nr_events);
P(nr_retries);
P(nr_hangs);
P_ns(max_hang_time);
#endif #endif
#undef P #undef P
#undef P_ns #undef P_ns
...@@ -254,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v) ...@@ -254,7 +257,7 @@ static int timer_list_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get()); u64 now = ktime_to_ns(ktime_get());
int cpu; int cpu;
SEQ_printf(m, "Timer List Version: v0.4\n"); SEQ_printf(m, "Timer List Version: v0.5\n");
SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES); SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now); SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment