Commit 164d44fd authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  clocksource: Add clocksource_register_hz/khz interface
  posix-cpu-timers: Optimize run_posix_cpu_timers()
  time: Remove xtime_cache
  mqueue: Convert message queue timeout to use hrtimers
  hrtimers: Provide schedule_hrtimeout for CLOCK_REALTIME
  timers: Introduce the concept of timer slack for legacy timers
  ntp: Remove tickadj
  ntp: Make time_adjust static
  time: Add xtime, wall_to_monotonic to feature-removal-schedule
  timer: Try to survive timer callback preempt_count leak
  timer: Split out timer function call
  timer: Print function name for timer callbacks modifying preemption count
  time: Clean up warp_clock()
  cpu-timers: Avoid iterating over all threads in fastpath_timer_check()
  cpu-timers: Change SIGEV_NONE timer implementation
  cpu-timers: Return correct previous timer reload value
  cpu-timers: Cleanup arm_timer()
  cpu-timers: Simplify RLIMIT_CPU handling
parents 5bfec46b d7e81c26
...@@ -541,6 +541,16 @@ Who: Avi Kivity <avi@redhat.com> ...@@ -541,6 +541,16 @@ Who: Avi Kivity <avi@redhat.com>
---------------------------- ----------------------------
What: xtime, wall_to_monotonic
When: 2.6.36+
Files: kernel/time/timekeeping.c include/linux/time.h
Why: Cleaning up timekeeping internal values. Please use
existing timekeeping accessor functions to access
the equivalent functionality.
Who: John Stultz <johnstul@us.ibm.com>
----------------------------
What: KVM kernel-allocated memory slots What: KVM kernel-allocated memory slots
When: July 2010 When: July 2010
Why: Since 2.6.25, kvm supports user-allocated memory slots, which are Why: Since 2.6.25, kvm supports user-allocated memory slots, which are
......
...@@ -273,7 +273,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift) ...@@ -273,7 +273,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
} }
/* used to install a new clocksource */
extern int clocksource_register(struct clocksource*); extern int clocksource_register(struct clocksource*);
extern void clocksource_unregister(struct clocksource*); extern void clocksource_unregister(struct clocksource*);
extern void clocksource_touch_watchdog(void); extern void clocksource_touch_watchdog(void);
...@@ -287,6 +286,24 @@ extern void clocksource_mark_unstable(struct clocksource *cs); ...@@ -287,6 +286,24 @@ extern void clocksource_mark_unstable(struct clocksource *cs);
extern void extern void
clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec); clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
/*
* Don't call __clocksource_register_scale directly, use
* clocksource_register_hz/khz
*/
extern int
__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
{
return __clocksource_register_scale(cs, 1, hz);
}
static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
{
return __clocksource_register_scale(cs, 1000, khz);
}
static inline void static inline void
clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec) clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
{ {
......
...@@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, ...@@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
const enum hrtimer_mode mode); const enum hrtimer_mode mode);
extern int schedule_hrtimeout_range_clock(ktime_t *expires,
unsigned long delta, const enum hrtimer_mode mode, int clock);
extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
/* Soft interrupt function to run the hrtimer queues: */ /* Soft interrupt function to run the hrtimer queues: */
......
...@@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran); ...@@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
extern int timekeeping_valid_for_hres(void); extern int timekeeping_valid_for_hres(void);
extern u64 timekeeping_max_deferment(void); extern u64 timekeeping_max_deferment(void);
extern void update_wall_time(void); extern void update_wall_time(void);
extern void update_xtime_cache(u64 nsec);
extern void timekeeping_leap_insert(int leapsecond); extern void timekeeping_leap_insert(int leapsecond);
struct tms; struct tms;
......
...@@ -10,13 +10,19 @@ ...@@ -10,13 +10,19 @@
struct tvec_base; struct tvec_base;
struct timer_list { struct timer_list {
/*
* All fields that change during normal runtime grouped to the
* same cacheline
*/
struct list_head entry; struct list_head entry;
unsigned long expires; unsigned long expires;
struct tvec_base *base;
void (*function)(unsigned long); void (*function)(unsigned long);
unsigned long data; unsigned long data;
struct tvec_base *base; int slack;
#ifdef CONFIG_TIMER_STATS #ifdef CONFIG_TIMER_STATS
void *start_site; void *start_site;
char start_comm[16]; char start_comm[16];
...@@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires); ...@@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires); extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
extern void set_timer_slack(struct timer_list *time, int slack_hz);
#define TIMER_NOT_PINNED 0 #define TIMER_NOT_PINNED 0
#define TIMER_PINNED 1 #define TIMER_PINNED 1
/* /*
......
...@@ -232,13 +232,11 @@ struct timex { ...@@ -232,13 +232,11 @@ struct timex {
*/ */
extern unsigned long tick_usec; /* USER_HZ period (usec) */ extern unsigned long tick_usec; /* USER_HZ period (usec) */
extern unsigned long tick_nsec; /* ACTHZ period (nsec) */ extern unsigned long tick_nsec; /* ACTHZ period (nsec) */
extern int tickadj; /* amount of adjustment per tick */
/* /*
* phase-lock loop variables * phase-lock loop variables
*/ */
extern int time_status; /* clock synchronization status bits */ extern int time_status; /* clock synchronization status bits */
extern long time_adjust; /* The amount of adjtime left */
extern void ntp_init(void); extern void ntp_init(void);
extern void ntp_clear(void); extern void ntp_clear(void);
...@@ -271,9 +269,6 @@ extern void second_overflow(void); ...@@ -271,9 +269,6 @@ extern void second_overflow(void);
extern void update_ntp_one_tick(void); extern void update_ntp_one_tick(void);
extern int do_adjtimex(struct timex *); extern int do_adjtimex(struct timex *);
/* Don't use! Compatibility define for existing users. */
#define tickadj (500/HZ ? : 1)
int read_current_timer(unsigned long *timer_val); int read_current_timer(unsigned long *timer_val);
/* The clock frequency of the i8253/i8254 PIT */ /* The clock frequency of the i8253/i8254 PIT */
......
...@@ -429,7 +429,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr, ...@@ -429,7 +429,7 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
* sr: SEND or RECV * sr: SEND or RECV
*/ */
static int wq_sleep(struct mqueue_inode_info *info, int sr, static int wq_sleep(struct mqueue_inode_info *info, int sr,
long timeout, struct ext_wait_queue *ewp) ktime_t *timeout, struct ext_wait_queue *ewp)
{ {
int retval; int retval;
signed long time; signed long time;
...@@ -440,7 +440,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr, ...@@ -440,7 +440,8 @@ static int wq_sleep(struct mqueue_inode_info *info, int sr,
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
spin_unlock(&info->lock); spin_unlock(&info->lock);
time = schedule_timeout(timeout); time = schedule_hrtimeout_range_clock(timeout,
HRTIMER_MODE_ABS, 0, CLOCK_REALTIME);
while (ewp->state == STATE_PENDING) while (ewp->state == STATE_PENDING)
cpu_relax(); cpu_relax();
...@@ -552,31 +553,16 @@ static void __do_notify(struct mqueue_inode_info *info) ...@@ -552,31 +553,16 @@ static void __do_notify(struct mqueue_inode_info *info)
wake_up(&info->wait_q); wake_up(&info->wait_q);
} }
static long prepare_timeout(struct timespec *p) static int prepare_timeout(const struct timespec __user *u_abs_timeout,
ktime_t *expires, struct timespec *ts)
{ {
struct timespec nowts; if (copy_from_user(ts, u_abs_timeout, sizeof(struct timespec)))
long timeout; return -EFAULT;
if (!timespec_valid(ts))
if (p) { return -EINVAL;
if (unlikely(p->tv_nsec < 0 || p->tv_sec < 0
|| p->tv_nsec >= NSEC_PER_SEC))
return -EINVAL;
nowts = CURRENT_TIME;
/* first subtract as jiffies can't be too big */
p->tv_sec -= nowts.tv_sec;
if (p->tv_nsec < nowts.tv_nsec) {
p->tv_nsec += NSEC_PER_SEC;
p->tv_sec--;
}
p->tv_nsec -= nowts.tv_nsec;
if (p->tv_sec < 0)
return 0;
timeout = timespec_to_jiffies(p) + 1;
} else
return MAX_SCHEDULE_TIMEOUT;
return timeout; *expires = timespec_to_ktime(*ts);
return 0;
} }
static void remove_notification(struct mqueue_inode_info *info) static void remove_notification(struct mqueue_inode_info *info)
...@@ -862,22 +848,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, ...@@ -862,22 +848,21 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
struct ext_wait_queue *receiver; struct ext_wait_queue *receiver;
struct msg_msg *msg_ptr; struct msg_msg *msg_ptr;
struct mqueue_inode_info *info; struct mqueue_inode_info *info;
struct timespec ts, *p = NULL; ktime_t expires, *timeout = NULL;
long timeout; struct timespec ts;
int ret; int ret;
if (u_abs_timeout) { if (u_abs_timeout) {
if (copy_from_user(&ts, u_abs_timeout, int res = prepare_timeout(u_abs_timeout, &expires, &ts);
sizeof(struct timespec))) if (res)
return -EFAULT; return res;
p = &ts; timeout = &expires;
} }
if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX)) if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
return -EINVAL; return -EINVAL;
audit_mq_sendrecv(mqdes, msg_len, msg_prio, p); audit_mq_sendrecv(mqdes, msg_len, msg_prio, timeout ? &ts : NULL);
timeout = prepare_timeout(p);
filp = fget(mqdes); filp = fget(mqdes);
if (unlikely(!filp)) { if (unlikely(!filp)) {
...@@ -919,9 +904,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, ...@@ -919,9 +904,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
if (filp->f_flags & O_NONBLOCK) { if (filp->f_flags & O_NONBLOCK) {
spin_unlock(&info->lock); spin_unlock(&info->lock);
ret = -EAGAIN; ret = -EAGAIN;
} else if (unlikely(timeout < 0)) {
spin_unlock(&info->lock);
ret = timeout;
} else { } else {
wait.task = current; wait.task = current;
wait.msg = (void *) msg_ptr; wait.msg = (void *) msg_ptr;
...@@ -954,24 +936,23 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, ...@@ -954,24 +936,23 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
size_t, msg_len, unsigned int __user *, u_msg_prio, size_t, msg_len, unsigned int __user *, u_msg_prio,
const struct timespec __user *, u_abs_timeout) const struct timespec __user *, u_abs_timeout)
{ {
long timeout;
ssize_t ret; ssize_t ret;
struct msg_msg *msg_ptr; struct msg_msg *msg_ptr;
struct file *filp; struct file *filp;
struct inode *inode; struct inode *inode;
struct mqueue_inode_info *info; struct mqueue_inode_info *info;
struct ext_wait_queue wait; struct ext_wait_queue wait;
struct timespec ts, *p = NULL; ktime_t expires, *timeout = NULL;
struct timespec ts;
if (u_abs_timeout) { if (u_abs_timeout) {
if (copy_from_user(&ts, u_abs_timeout, int res = prepare_timeout(u_abs_timeout, &expires, &ts);
sizeof(struct timespec))) if (res)
return -EFAULT; return res;
p = &ts; timeout = &expires;
} }
audit_mq_sendrecv(mqdes, msg_len, 0, p); audit_mq_sendrecv(mqdes, msg_len, 0, timeout ? &ts : NULL);
timeout = prepare_timeout(p);
filp = fget(mqdes); filp = fget(mqdes);
if (unlikely(!filp)) { if (unlikely(!filp)) {
...@@ -1003,11 +984,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, ...@@ -1003,11 +984,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
if (filp->f_flags & O_NONBLOCK) { if (filp->f_flags & O_NONBLOCK) {
spin_unlock(&info->lock); spin_unlock(&info->lock);
ret = -EAGAIN; ret = -EAGAIN;
msg_ptr = NULL;
} else if (unlikely(timeout < 0)) {
spin_unlock(&info->lock);
ret = timeout;
msg_ptr = NULL;
} else { } else {
wait.task = current; wait.task = current;
wait.state = STATE_NONE; wait.state = STATE_NONE;
......
...@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void) ...@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
} }
/** /**
* schedule_hrtimeout_range - sleep until timeout * schedule_hrtimeout_range_clock - sleep until timeout
* @expires: timeout value (ktime_t) * @expires: timeout value (ktime_t)
* @delta: slack in expires timeout (ktime_t) * @delta: slack in expires timeout (ktime_t)
* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL * @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
* * @clock: timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
* Make the current task sleep until the given expiry time has
* elapsed. The routine will return immediately unless
* the current task state has been set (see set_current_state()).
*
* The @delta argument gives the kernel the freedom to schedule the
* actual wakeup to a time that is both power and performance friendly.
* The kernel give the normal best effort behavior for "@expires+@delta",
* but may decide to fire the timer earlier, but no earlier than @expires.
*
* You can set the task state as follows -
*
* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
* pass before the routine returns.
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
* delivered to the current task.
*
* The current task state is guaranteed to be TASK_RUNNING when this
* routine returns.
*
* Returns 0 when the timer has expired otherwise -EINTR
*/ */
int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, int __sched
const enum hrtimer_mode mode) schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
const enum hrtimer_mode mode, int clock)
{ {
struct hrtimer_sleeper t; struct hrtimer_sleeper t;
...@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, ...@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
return -EINTR; return -EINTR;
} }
hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode); hrtimer_init_on_stack(&t.timer, clock, mode);
hrtimer_set_expires_range_ns(&t.timer, *expires, delta); hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
hrtimer_init_sleeper(&t, current); hrtimer_init_sleeper(&t, current);
...@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta, ...@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
return !t.task ? 0 : -EINTR; return !t.task ? 0 : -EINTR;
} }
/**
* schedule_hrtimeout_range - sleep until timeout
* @expires: timeout value (ktime_t)
* @delta: slack in expires timeout (ktime_t)
* @mode: timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
*
* Make the current task sleep until the given expiry time has
* elapsed. The routine will return immediately unless
* the current task state has been set (see set_current_state()).
*
* The @delta argument gives the kernel the freedom to schedule the
* actual wakeup to a time that is both power and performance friendly.
* The kernel give the normal best effort behavior for "@expires+@delta",
* but may decide to fire the timer earlier, but no earlier than @expires.
*
* You can set the task state as follows -
*
* %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
* pass before the routine returns.
*
* %TASK_INTERRUPTIBLE - the routine may return early if a signal is
* delivered to the current task.
*
* The current task state is guaranteed to be TASK_RUNNING when this
* routine returns.
*
* Returns 0 when the timer has expired otherwise -EINTR
*/
int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
const enum hrtimer_mode mode)
{
return schedule_hrtimeout_range_clock(expires, delta, mode,
CLOCK_MONOTONIC);
}
EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
/** /**
......
...@@ -11,19 +11,18 @@ ...@@ -11,19 +11,18 @@
#include <trace/events/timer.h> #include <trace/events/timer.h>
/* /*
* Called after updating RLIMIT_CPU to set timer expiration if necessary. * Called after updating RLIMIT_CPU to run cpu timer and update
* tsk->signal->cputime_expires expiration cache if necessary. Needs
* siglock protection since other code may update expiration cache as
* well.
*/ */
void update_rlimit_cpu(unsigned long rlim_new) void update_rlimit_cpu(unsigned long rlim_new)
{ {
cputime_t cputime = secs_to_cputime(rlim_new); cputime_t cputime = secs_to_cputime(rlim_new);
struct signal_struct *const sig = current->signal;
if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) || spin_lock_irq(&current->sighand->siglock);
cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) { set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
spin_lock_irq(&current->sighand->siglock); spin_unlock_irq(&current->sighand->siglock);
set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
spin_unlock_irq(&current->sighand->siglock);
}
} }
static int check_clock(const clockid_t which_clock) static int check_clock(const clockid_t which_clock)
...@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp) ...@@ -548,111 +547,62 @@ static inline int expires_gt(cputime_t expires, cputime_t new_exp)
cputime_gt(expires, new_exp); cputime_gt(expires, new_exp);
} }
static inline int expires_le(cputime_t expires, cputime_t new_exp)
{
return !cputime_eq(expires, cputime_zero) &&
cputime_le(expires, new_exp);
}
/* /*
* Insert the timer on the appropriate list before any timers that * Insert the timer on the appropriate list before any timers that
* expire later. This must be called with the tasklist_lock held * expire later. This must be called with the tasklist_lock held
* for reading, and interrupts disabled. * for reading, interrupts disabled and p->sighand->siglock taken.
*/ */
static void arm_timer(struct k_itimer *timer, union cpu_time_count now) static void arm_timer(struct k_itimer *timer)
{ {
struct task_struct *p = timer->it.cpu.task; struct task_struct *p = timer->it.cpu.task;
struct list_head *head, *listpos; struct list_head *head, *listpos;
struct task_cputime *cputime_expires;
struct cpu_timer_list *const nt = &timer->it.cpu; struct cpu_timer_list *const nt = &timer->it.cpu;
struct cpu_timer_list *next; struct cpu_timer_list *next;
unsigned long i;
head = (CPUCLOCK_PERTHREAD(timer->it_clock) ? if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
p->cpu_timers : p->signal->cpu_timers); head = p->cpu_timers;
cputime_expires = &p->cputime_expires;
} else {
head = p->signal->cpu_timers;
cputime_expires = &p->signal->cputime_expires;
}
head += CPUCLOCK_WHICH(timer->it_clock); head += CPUCLOCK_WHICH(timer->it_clock);
BUG_ON(!irqs_disabled());
spin_lock(&p->sighand->siglock);
listpos = head; listpos = head;
if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) { list_for_each_entry(next, head, entry) {
list_for_each_entry(next, head, entry) { if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
if (next->expires.sched > nt->expires.sched) break;
break; listpos = &next->entry;
listpos = &next->entry;
}
} else {
list_for_each_entry(next, head, entry) {
if (cputime_gt(next->expires.cpu, nt->expires.cpu))
break;
listpos = &next->entry;
}
} }
list_add(&nt->entry, listpos); list_add(&nt->entry, listpos);
if (listpos == head) { if (listpos == head) {
union cpu_time_count *exp = &nt->expires;
/* /*
* We are the new earliest-expiring timer. * We are the new earliest-expiring POSIX 1.b timer, hence
* If we are a thread timer, there can always * need to update expiration cache. Take into account that
* be a process timer telling us to stop earlier. * for process timers we share expiration cache with itimers
* and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
*/ */
if (CPUCLOCK_PERTHREAD(timer->it_clock)) { switch (CPUCLOCK_WHICH(timer->it_clock)) {
union cpu_time_count *exp = &nt->expires; case CPUCLOCK_PROF:
if (expires_gt(cputime_expires->prof_exp, exp->cpu))
switch (CPUCLOCK_WHICH(timer->it_clock)) { cputime_expires->prof_exp = exp->cpu;
default: break;
BUG(); case CPUCLOCK_VIRT:
case CPUCLOCK_PROF: if (expires_gt(cputime_expires->virt_exp, exp->cpu))
if (expires_gt(p->cputime_expires.prof_exp, cputime_expires->virt_exp = exp->cpu;
exp->cpu)) break;
p->cputime_expires.prof_exp = exp->cpu; case CPUCLOCK_SCHED:
break; if (cputime_expires->sched_exp == 0 ||
case CPUCLOCK_VIRT: cputime_expires->sched_exp > exp->sched)
if (expires_gt(p->cputime_expires.virt_exp, cputime_expires->sched_exp = exp->sched;
exp->cpu)) break;
p->cputime_expires.virt_exp = exp->cpu;
break;
case CPUCLOCK_SCHED:
if (p->cputime_expires.sched_exp == 0 ||
p->cputime_expires.sched_exp > exp->sched)
p->cputime_expires.sched_exp =
exp->sched;
break;
}
} else {
struct signal_struct *const sig = p->signal;
union cpu_time_count *exp = &timer->it.cpu.expires;
/*
* For a process timer, set the cached expiration time.
*/
switch (CPUCLOCK_WHICH(timer->it_clock)) {
default:
BUG();
case CPUCLOCK_VIRT:
if (expires_le(sig->it[CPUCLOCK_VIRT].expires,
exp->cpu))
break;
sig->cputime_expires.virt_exp = exp->cpu;
break;
case CPUCLOCK_PROF:
if (expires_le(sig->it[CPUCLOCK_PROF].expires,
exp->cpu))
break;
i = sig->rlim[RLIMIT_CPU].rlim_cur;
if (i != RLIM_INFINITY &&
i <= cputime_to_secs(exp->cpu))
break;
sig->cputime_expires.prof_exp = exp->cpu;
break;
case CPUCLOCK_SCHED:
sig->cputime_expires.sched_exp = exp->sched;
break;
}
} }
} }
spin_unlock(&p->sighand->siglock);
} }
/* /*
...@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now) ...@@ -660,7 +610,12 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
*/ */
static void cpu_timer_fire(struct k_itimer *timer) static void cpu_timer_fire(struct k_itimer *timer)
{ {
if (unlikely(timer->sigq == NULL)) { if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
/*
* User don't want any signal.
*/
timer->it.cpu.expires.sched = 0;
} else if (unlikely(timer->sigq == NULL)) {
/* /*
* This a special case for clock_nanosleep, * This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create. * not a normal timer from sys_timer_create.
...@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, ...@@ -721,7 +676,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
struct itimerspec *new, struct itimerspec *old) struct itimerspec *new, struct itimerspec *old)
{ {
struct task_struct *p = timer->it.cpu.task; struct task_struct *p = timer->it.cpu.task;
union cpu_time_count old_expires, new_expires, val; union cpu_time_count old_expires, new_expires, old_incr, val;
int ret; int ret;
if (unlikely(p == NULL)) { if (unlikely(p == NULL)) {
...@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, ...@@ -752,6 +707,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
BUG_ON(!irqs_disabled()); BUG_ON(!irqs_disabled());
ret = 0; ret = 0;
old_incr = timer->it.cpu.incr;
spin_lock(&p->sighand->siglock); spin_lock(&p->sighand->siglock);
old_expires = timer->it.cpu.expires; old_expires = timer->it.cpu.expires;
if (unlikely(timer->it.cpu.firing)) { if (unlikely(timer->it.cpu.firing)) {
...@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, ...@@ -759,7 +715,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
ret = TIMER_RETRY; ret = TIMER_RETRY;
} else } else
list_del_init(&timer->it.cpu.entry); list_del_init(&timer->it.cpu.entry);
spin_unlock(&p->sighand->siglock);
/* /*
* We need to sample the current value to convert the new * We need to sample the current value to convert the new
...@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, ...@@ -813,6 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* disable this firing since we are already reporting * disable this firing since we are already reporting
* it as an overrun (thanks to bump_cpu_timer above). * it as an overrun (thanks to bump_cpu_timer above).
*/ */
spin_unlock(&p->sighand->siglock);
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
goto out; goto out;
} }
...@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, ...@@ -828,11 +784,11 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
*/ */
timer->it.cpu.expires = new_expires; timer->it.cpu.expires = new_expires;
if (new_expires.sched != 0 && if (new_expires.sched != 0 &&
(timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
cpu_time_before(timer->it_clock, val, new_expires)) { cpu_time_before(timer->it_clock, val, new_expires)) {
arm_timer(timer, val); arm_timer(timer);
} }
spin_unlock(&p->sighand->siglock);
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
/* /*
...@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, ...@@ -853,7 +809,6 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
timer->it_overrun = -1; timer->it_overrun = -1;
if (new_expires.sched != 0 && if (new_expires.sched != 0 &&
(timer->it_sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE &&
!cpu_time_before(timer->it_clock, val, new_expires)) { !cpu_time_before(timer->it_clock, val, new_expires)) {
/* /*
* The designated time already passed, so we notify * The designated time already passed, so we notify
...@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, ...@@ -867,7 +822,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags,
out: out:
if (old) { if (old) {
sample_to_timespec(timer->it_clock, sample_to_timespec(timer->it_clock,
timer->it.cpu.incr, &old->it_interval); old_incr, &old->it_interval);
} }
return ret; return ret;
} }
...@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) ...@@ -927,25 +882,6 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
} }
if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
if (timer->it.cpu.incr.sched == 0 &&
cpu_time_before(timer->it_clock,
timer->it.cpu.expires, now)) {
/*
* Do-nothing timer expired and has no reload,
* so it's as if it was never set.
*/
timer->it.cpu.expires.sched = 0;
itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
return;
}
/*
* Account for any expirations and reloads that should
* have happened.
*/
bump_cpu_timer(timer, now);
}
if (unlikely(clear_dead)) { if (unlikely(clear_dead)) {
/* /*
* We've noticed that the thread is dead, but * We've noticed that the thread is dead, but
...@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig) ...@@ -1066,16 +1002,9 @@ static void stop_process_timers(struct signal_struct *sig)
struct thread_group_cputimer *cputimer = &sig->cputimer; struct thread_group_cputimer *cputimer = &sig->cputimer;
unsigned long flags; unsigned long flags;
if (!cputimer->running)
return;
spin_lock_irqsave(&cputimer->lock, flags); spin_lock_irqsave(&cputimer->lock, flags);
cputimer->running = 0; cputimer->running = 0;
spin_unlock_irqrestore(&cputimer->lock, flags); spin_unlock_irqrestore(&cputimer->lock, flags);
sig->cputime_expires.prof_exp = cputime_zero;
sig->cputime_expires.virt_exp = cputime_zero;
sig->cputime_expires.sched_exp = 0;
} }
static u32 onecputick; static u32 onecputick;
...@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, ...@@ -1112,6 +1041,23 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
} }
} }
/**
* task_cputime_zero - Check a task_cputime struct for all zero fields.
*
* @cputime: The struct to compare.
*
* Checks @cputime to see if all fields are zero. Returns true if all fields
* are zero, false if any field is nonzero.
*/
static inline int task_cputime_zero(const struct task_cputime *cputime)
{
if (cputime_eq(cputime->utime, cputime_zero) &&
cputime_eq(cputime->stime, cputime_zero) &&
cputime->sum_exec_runtime == 0)
return 1;
return 0;
}
/* /*
* Check for any per-thread CPU timers that have fired and move them * Check for any per-thread CPU timers that have fired and move them
* off the tsk->*_timers list onto the firing list. Per-thread timers * off the tsk->*_timers list onto the firing list. Per-thread timers
...@@ -1128,19 +1074,6 @@ static void check_process_timers(struct task_struct *tsk, ...@@ -1128,19 +1074,6 @@ static void check_process_timers(struct task_struct *tsk,
struct task_cputime cputime; struct task_cputime cputime;
unsigned long soft; unsigned long soft;
/*
* Don't sample the current process CPU clocks if there are no timers.
*/
if (list_empty(&timers[CPUCLOCK_PROF]) &&
cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
list_empty(&timers[CPUCLOCK_VIRT]) &&
cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
list_empty(&timers[CPUCLOCK_SCHED])) {
stop_process_timers(sig);
return;
}
/* /*
* Collect the current process totals. * Collect the current process totals.
*/ */
...@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk, ...@@ -1230,18 +1163,11 @@ static void check_process_timers(struct task_struct *tsk,
} }
} }
if (!cputime_eq(prof_expires, cputime_zero) && sig->cputime_expires.prof_exp = prof_expires;
(cputime_eq(sig->cputime_expires.prof_exp, cputime_zero) || sig->cputime_expires.virt_exp = virt_expires;
cputime_gt(sig->cputime_expires.prof_exp, prof_expires))) sig->cputime_expires.sched_exp = sched_expires;
sig->cputime_expires.prof_exp = prof_expires; if (task_cputime_zero(&sig->cputime_expires))
if (!cputime_eq(virt_expires, cputime_zero) && stop_process_timers(sig);
(cputime_eq(sig->cputime_expires.virt_exp, cputime_zero) ||
cputime_gt(sig->cputime_expires.virt_exp, virt_expires)))
sig->cputime_expires.virt_exp = virt_expires;
if (sched_expires != 0 &&
(sig->cputime_expires.sched_exp == 0 ||
sig->cputime_expires.sched_exp > sched_expires))
sig->cputime_expires.sched_exp = sched_expires;
} }
/* /*
...@@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) ...@@ -1270,6 +1196,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
goto out; goto out;
} }
read_lock(&tasklist_lock); /* arm_timer needs it. */ read_lock(&tasklist_lock); /* arm_timer needs it. */
spin_lock(&p->sighand->siglock);
} else { } else {
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
if (unlikely(p->signal == NULL)) { if (unlikely(p->signal == NULL)) {
...@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) ...@@ -1290,6 +1217,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
clear_dead_task(timer, now); clear_dead_task(timer, now);
goto out_unlock; goto out_unlock;
} }
spin_lock(&p->sighand->siglock);
cpu_timer_sample_group(timer->it_clock, p, &now); cpu_timer_sample_group(timer->it_clock, p, &now);
bump_cpu_timer(timer, now); bump_cpu_timer(timer, now);
/* Leave the tasklist_lock locked for the call below. */ /* Leave the tasklist_lock locked for the call below. */
...@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) ...@@ -1298,7 +1226,9 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
/* /*
* Now re-arm for the new expiry time. * Now re-arm for the new expiry time.
*/ */
arm_timer(timer, now); BUG_ON(!irqs_disabled());
arm_timer(timer);
spin_unlock(&p->sighand->siglock);
out_unlock: out_unlock:
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
...@@ -1309,23 +1239,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) ...@@ -1309,23 +1239,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
++timer->it_requeue_pending; ++timer->it_requeue_pending;
} }
/**
* task_cputime_zero - Check a task_cputime struct for all zero fields.
*
* @cputime: The struct to compare.
*
* Checks @cputime to see if all fields are zero. Returns true if all fields
* are zero, false if any field is nonzero.
*/
static inline int task_cputime_zero(const struct task_cputime *cputime)
{
if (cputime_eq(cputime->utime, cputime_zero) &&
cputime_eq(cputime->stime, cputime_zero) &&
cputime->sum_exec_runtime == 0)
return 1;
return 0;
}
/** /**
* task_cputime_expired - Compare two task_cputime entities. * task_cputime_expired - Compare two task_cputime entities.
* *
...@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) ...@@ -1382,7 +1295,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
} }
sig = tsk->signal; sig = tsk->signal;
if (!task_cputime_zero(&sig->cputime_expires)) { if (sig->cputimer.running) {
struct task_cputime group_sample; struct task_cputime group_sample;
thread_group_cputimer(tsk, &group_sample); thread_group_cputimer(tsk, &group_sample);
...@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) ...@@ -1390,7 +1303,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
return 1; return 1;
} }
return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY; return 0;
} }
/* /*
...@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk) ...@@ -1419,7 +1332,12 @@ void run_posix_cpu_timers(struct task_struct *tsk)
* put them on the firing list. * put them on the firing list.
*/ */
check_thread_timers(tsk, &firing); check_thread_timers(tsk, &firing);
check_process_timers(tsk, &firing); /*
* If there are any active process wide timers (POSIX 1.b, itimers,
* RLIMIT_CPU) cputimer must be running.
*/
if (tsk->signal->cputimer.running)
check_process_timers(tsk, &firing);
/* /*
* We must release these locks before taking any timer's lock. * We must release these locks before taking any timer's lock.
...@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk) ...@@ -1456,21 +1374,23 @@ void run_posix_cpu_timers(struct task_struct *tsk)
} }
/* /*
* Set one of the process-wide special case CPU timers. * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
* The tsk->sighand->siglock must be held by the caller. * The tsk->sighand->siglock must be held by the caller.
* The *newval argument is relative and we update it to be absolute, *oldval
* is absolute and we update it to be relative.
*/ */
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval) cputime_t *newval, cputime_t *oldval)
{ {
union cpu_time_count now; union cpu_time_count now;
struct list_head *head;
BUG_ON(clock_idx == CPUCLOCK_SCHED); BUG_ON(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now); cpu_timer_sample_group(clock_idx, tsk, &now);
if (oldval) { if (oldval) {
/*
* We are setting itimer. The *oldval is absolute and we update
* it to be relative, *newval argument is relative and we update
* it to be absolute.
*/
if (!cputime_eq(*oldval, cputime_zero)) { if (!cputime_eq(*oldval, cputime_zero)) {
if (cputime_le(*oldval, now.cpu)) { if (cputime_le(*oldval, now.cpu)) {
/* Just about to fire. */ /* Just about to fire. */
...@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx, ...@@ -1483,33 +1403,21 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
if (cputime_eq(*newval, cputime_zero)) if (cputime_eq(*newval, cputime_zero))
return; return;
*newval = cputime_add(*newval, now.cpu); *newval = cputime_add(*newval, now.cpu);
/*
* If the RLIMIT_CPU timer will expire before the
* ITIMER_PROF timer, we have nothing else to do.
*/
if (tsk->signal->rlim[RLIMIT_CPU].rlim_cur
< cputime_to_secs(*newval))
return;
} }
/* /*
* Check whether there are any process timers already set to fire * Update expiration cache if we are the earliest timer, or eventually
* before this one. If so, we don't have anything more to do. * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
*/ */
head = &tsk->signal->cpu_timers[clock_idx]; switch (clock_idx) {
if (list_empty(head) || case CPUCLOCK_PROF:
cputime_ge(list_first_entry(head, if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
struct cpu_timer_list, entry)->expires.cpu,
*newval)) {
switch (clock_idx) {
case CPUCLOCK_PROF:
tsk->signal->cputime_expires.prof_exp = *newval; tsk->signal->cputime_expires.prof_exp = *newval;
break; break;
case CPUCLOCK_VIRT: case CPUCLOCK_VIRT:
if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
tsk->signal->cputime_expires.virt_exp = *newval; tsk->signal->cputime_expires.virt_exp = *newval;
break; break;
}
} }
} }
......
...@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, ...@@ -132,12 +132,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
*/ */
static inline void warp_clock(void) static inline void warp_clock(void)
{ {
write_seqlock_irq(&xtime_lock); struct timespec delta, adjust;
wall_to_monotonic.tv_sec -= sys_tz.tz_minuteswest * 60; delta.tv_sec = sys_tz.tz_minuteswest * 60;
xtime.tv_sec += sys_tz.tz_minuteswest * 60; delta.tv_nsec = 0;
update_xtime_cache(0); adjust = timespec_add_safe(current_kernel_time(), delta);
write_sequnlock_irq(&xtime_lock); do_settimeofday(&adjust);
clock_was_set();
} }
/* /*
......
...@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs) ...@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs)
list_add(&cs->list, entry); list_add(&cs->list, entry);
} }
/*
* Maximum time we expect to go between ticks. This includes idle
* tickless time. It provides the trade off between selecting a
* mult/shift pair that is very precise but can only handle a short
* period of time, vs. a mult/shift pair that can handle long periods
* of time but isn't as precise.
*
* This is a subsystem constant, and actual hardware limitations
* may override it (ie: clocksources that wrap every 3 seconds).
*/
#define MAX_UPDATE_LENGTH 5 /* Seconds */
/**
* __clocksource_register_scale - Used to install new clocksources
* @t: clocksource to be registered
* @scale: Scale factor multiplied against freq to get clocksource hz
* @freq: clocksource frequency (cycles per second) divided by scale
*
* Returns -EBUSY if registration fails, zero otherwise.
*
* This *SHOULD NOT* be called directly! Please use the
* clocksource_register_hz() or clocksource_register_khz helper functions.
*/
int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
/*
* Ideally we want to use some of the limits used in
* clocksource_max_deferment, to provide a more informed
* MAX_UPDATE_LENGTH. But for now this just gets the
* register interface working properly.
*/
clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
NSEC_PER_SEC/scale,
MAX_UPDATE_LENGTH*scale);
cs->max_idle_ns = clocksource_max_deferment(cs);
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_select();
clocksource_enqueue_watchdog(cs);
mutex_unlock(&clocksource_mutex);
return 0;
}
EXPORT_SYMBOL_GPL(__clocksource_register_scale);
/** /**
* clocksource_register - Used to install new clocksources * clocksource_register - Used to install new clocksources
* @t: clocksource to be registered * @t: clocksource to be registered
......
...@@ -69,7 +69,7 @@ static s64 time_freq; ...@@ -69,7 +69,7 @@ static s64 time_freq;
/* time at last adjustment (secs): */ /* time at last adjustment (secs): */
static long time_reftime; static long time_reftime;
long time_adjust; static long time_adjust;
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */ /* constant (boot-param configurable) NTP tick adjustment (upscaled) */
static s64 ntp_tick_adj; static s64 ntp_tick_adj;
......
...@@ -165,13 +165,6 @@ struct timespec raw_time; ...@@ -165,13 +165,6 @@ struct timespec raw_time;
/* flag for if timekeeping is suspended */ /* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended; int __read_mostly timekeeping_suspended;
static struct timespec xtime_cache __attribute__ ((aligned (16)));
void update_xtime_cache(u64 nsec)
{
xtime_cache = xtime;
timespec_add_ns(&xtime_cache, nsec);
}
/* must hold xtime_lock */ /* must hold xtime_lock */
void timekeeping_leap_insert(int leapsecond) void timekeeping_leap_insert(int leapsecond)
{ {
...@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv) ...@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
xtime = *tv; xtime = *tv;
update_xtime_cache(0);
timekeeper.ntp_error = 0; timekeeper.ntp_error = 0;
ntp_clear(); ntp_clear();
...@@ -559,7 +550,6 @@ void __init timekeeping_init(void) ...@@ -559,7 +550,6 @@ void __init timekeeping_init(void)
} }
set_normalized_timespec(&wall_to_monotonic, set_normalized_timespec(&wall_to_monotonic,
-boot.tv_sec, -boot.tv_nsec); -boot.tv_sec, -boot.tv_nsec);
update_xtime_cache(0);
total_sleep_time.tv_sec = 0; total_sleep_time.tv_sec = 0;
total_sleep_time.tv_nsec = 0; total_sleep_time.tv_nsec = 0;
write_sequnlock_irqrestore(&xtime_lock, flags); write_sequnlock_irqrestore(&xtime_lock, flags);
...@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev) ...@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
wall_to_monotonic = timespec_sub(wall_to_monotonic, ts); wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
total_sleep_time = timespec_add_safe(total_sleep_time, ts); total_sleep_time = timespec_add_safe(total_sleep_time, ts);
} }
update_xtime_cache(0);
/* re-base the last cycle value */ /* re-base the last cycle value */
timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock); timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
timekeeper.ntp_error = 0; timekeeper.ntp_error = 0;
...@@ -788,7 +777,6 @@ void update_wall_time(void) ...@@ -788,7 +777,6 @@ void update_wall_time(void)
{ {
struct clocksource *clock; struct clocksource *clock;
cycle_t offset; cycle_t offset;
u64 nsecs;
int shift = 0, maxshift; int shift = 0, maxshift;
/* Make sure we're fully resumed: */ /* Make sure we're fully resumed: */
...@@ -847,7 +835,9 @@ void update_wall_time(void) ...@@ -847,7 +835,9 @@ void update_wall_time(void)
timekeeper.ntp_error += neg << timekeeper.ntp_error_shift; timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
} }
/* store full nanoseconds into xtime after rounding it up and
/*
* Store full nanoseconds into xtime after rounding it up and
* add the remainder to the error difference. * add the remainder to the error difference.
*/ */
xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1; xtime.tv_nsec = ((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
...@@ -855,8 +845,15 @@ void update_wall_time(void) ...@@ -855,8 +845,15 @@ void update_wall_time(void)
timekeeper.ntp_error += timekeeper.xtime_nsec << timekeeper.ntp_error += timekeeper.xtime_nsec <<
timekeeper.ntp_error_shift; timekeeper.ntp_error_shift;
nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift); /*
update_xtime_cache(nsecs); * Finally, make sure that after the rounding
* xtime.tv_nsec isn't larger then NSEC_PER_SEC
*/
if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
xtime.tv_nsec -= NSEC_PER_SEC;
xtime.tv_sec++;
second_overflow();
}
/* check to see if there is a new clocksource to use */ /* check to see if there is a new clocksource to use */
update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult); update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
...@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased); ...@@ -896,13 +893,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
unsigned long get_seconds(void) unsigned long get_seconds(void)
{ {
return xtime_cache.tv_sec; return xtime.tv_sec;
} }
EXPORT_SYMBOL(get_seconds); EXPORT_SYMBOL(get_seconds);
struct timespec __current_kernel_time(void) struct timespec __current_kernel_time(void)
{ {
return xtime_cache; return xtime;
} }
struct timespec current_kernel_time(void) struct timespec current_kernel_time(void)
...@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void) ...@@ -913,7 +910,7 @@ struct timespec current_kernel_time(void)
do { do {
seq = read_seqbegin(&xtime_lock); seq = read_seqbegin(&xtime_lock);
now = xtime_cache; now = xtime;
} while (read_seqretry(&xtime_lock, seq)); } while (read_seqretry(&xtime_lock, seq));
return now; return now;
...@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void) ...@@ -928,7 +925,7 @@ struct timespec get_monotonic_coarse(void)
do { do {
seq = read_seqbegin(&xtime_lock); seq = read_seqbegin(&xtime_lock);
now = xtime_cache; now = xtime;
mono = wall_to_monotonic; mono = wall_to_monotonic;
} while (read_seqretry(&xtime_lock, seq)); } while (read_seqretry(&xtime_lock, seq));
......
...@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j) ...@@ -319,6 +319,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)
} }
EXPORT_SYMBOL_GPL(round_jiffies_up_relative); EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
/**
* set_timer_slack - set the allowed slack for a timer
* @slack_hz: the amount of time (in jiffies) allowed for rounding
*
* Set the amount of time, in jiffies, that a certain timer has
* in terms of slack. By setting this value, the timer subsystem
* will schedule the actual timer somewhere between
* the time mod_timer() asks for, and that time plus the slack.
*
* By setting the slack to -1, a percentage of the delay is used
* instead.
*/
void set_timer_slack(struct timer_list *timer, int slack_hz)
{
timer->slack = slack_hz;
}
EXPORT_SYMBOL_GPL(set_timer_slack);
static inline void set_running_timer(struct tvec_base *base, static inline void set_running_timer(struct tvec_base *base,
struct timer_list *timer) struct timer_list *timer)
...@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer, ...@@ -550,6 +568,7 @@ static void __init_timer(struct timer_list *timer,
{ {
timer->entry.next = NULL; timer->entry.next = NULL;
timer->base = __raw_get_cpu_var(tvec_bases); timer->base = __raw_get_cpu_var(tvec_bases);
timer->slack = -1;
#ifdef CONFIG_TIMER_STATS #ifdef CONFIG_TIMER_STATS
timer->start_site = NULL; timer->start_site = NULL;
timer->start_pid = -1; timer->start_pid = -1;
...@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires) ...@@ -715,6 +734,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
} }
EXPORT_SYMBOL(mod_timer_pending); EXPORT_SYMBOL(mod_timer_pending);
/*
* Decide where to put the timer while taking the slack into account
*
* Algorithm:
* 1) calculate the maximum (absolute) time
* 2) calculate the highest bit where the expires and new max are different
* 3) use this bit to make a mask
* 4) use the bitmask to round down the maximum time, so that all last
* bits are zeros
*/
static inline
unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
{
unsigned long expires_limit, mask;
int bit;
expires_limit = expires + timer->slack;
if (timer->slack < 0) /* auto slack: use 0.4% */
expires_limit = expires + (expires - jiffies)/256;
mask = expires ^ expires_limit;
if (mask == 0)
return expires;
bit = find_last_bit(&mask, BITS_PER_LONG);
mask = (1 << bit) - 1;
expires_limit = expires_limit & ~(mask);
return expires_limit;
}
/** /**
* mod_timer - modify a timer's timeout * mod_timer - modify a timer's timeout
* @timer: the timer to be modified * @timer: the timer to be modified
...@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires) ...@@ -745,6 +799,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
if (timer_pending(timer) && timer->expires == expires) if (timer_pending(timer) && timer->expires == expires)
return 1; return 1;
expires = apply_slack(timer, expires);
return __mod_timer(timer, expires, false, TIMER_NOT_PINNED); return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
} }
EXPORT_SYMBOL(mod_timer); EXPORT_SYMBOL(mod_timer);
...@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index) ...@@ -955,6 +1011,47 @@ static int cascade(struct tvec_base *base, struct tvec *tv, int index)
return index; return index;
} }
static void call_timer_fn(struct timer_list *timer, void (*fn)(unsigned long),
unsigned long data)
{
int preempt_count = preempt_count();
#ifdef CONFIG_LOCKDEP
/*
* It is permissible to free the timer from inside the
* function that is called from it, this we need to take into
* account for lockdep too. To avoid bogus "held lock freed"
* warnings as well as problems when looking into
* timer->lockdep_map, make a copy and use that here.
*/
struct lockdep_map lockdep_map = timer->lockdep_map;
#endif
/*
* Couple the lock chain with the lock chain at
* del_timer_sync() by acquiring the lock_map around the fn()
* call here and in del_timer_sync().
*/
lock_map_acquire(&lockdep_map);
trace_timer_expire_entry(timer);
fn(data);
trace_timer_expire_exit(timer);
lock_map_release(&lockdep_map);
if (preempt_count != preempt_count()) {
WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
fn, preempt_count, preempt_count());
/*
* Restore the preempt count. That gives us a decent
* chance to survive and extract information. If the
* callback kept a lock held, bad luck, but not worse
* than the BUG() we had.
*/
preempt_count() = preempt_count;
}
}
#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK) #define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
/** /**
...@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base) ...@@ -998,45 +1095,7 @@ static inline void __run_timers(struct tvec_base *base)
detach_timer(timer, 1); detach_timer(timer, 1);
spin_unlock_irq(&base->lock); spin_unlock_irq(&base->lock);
{ call_timer_fn(timer, fn, data);
int preempt_count = preempt_count();
#ifdef CONFIG_LOCKDEP
/*
* It is permissible to free the timer from
* inside the function that is called from
* it, this we need to take into account for
* lockdep too. To avoid bogus "held lock
* freed" warnings as well as problems when
* looking into timer->lockdep_map, make a
* copy and use that here.
*/
struct lockdep_map lockdep_map =
timer->lockdep_map;
#endif
/*
* Couple the lock chain with the lock chain at
* del_timer_sync() by acquiring the lock_map
* around the fn() call here and in
* del_timer_sync().
*/
lock_map_acquire(&lockdep_map);
trace_timer_expire_entry(timer);
fn(data);
trace_timer_expire_exit(timer);
lock_map_release(&lockdep_map);
if (preempt_count != preempt_count()) {
printk(KERN_ERR "huh, entered %p "
"with preempt_count %08x, exited"
" with %08x?\n",
fn, preempt_count,
preempt_count());
BUG();
}
}
spin_lock_irq(&base->lock); spin_lock_irq(&base->lock);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment