Commit afc14106 authored by Ingo Molnar's avatar Ingo Molnar Committed by Linus Torvalds

[PATCH] timer-2.5.40-F7

This does a number of timer subsystem enhancements:

- simplified timer initialization, now it's the cheapest possible thing:

    static inline void init_timer(struct timer_list * timer)
    {
            timer->base = NULL;
    }

  since the timer functions already did a !timer->base check this did not
  have any effect on their fastpath.

- the rule from now on is that timer->base is set upon activation of the
  timer, and cleared upon deactivation. This also made it possible to:

- reorganize all the timer handling code to not assume anything about
  timer->entry.next and timer->entry.prev - this also removed lots of
  unnecessery cleaning of these fields. Removed lots of unnecessary list
  operations from the fastpath.

- simplified del_timer_sync(): it now uses del_timer() plus some simple
  synchronization code. Note that this also fixes a bug: if mod_timer (or
  add_timer) moves a currently executing timer to another CPU's timer
  vector, then del_timer_sync() does not synchronize with the handler
  properly.

- bugfix: moved run_local_timers() from scheduler_tick() into
  update_process_times() .. scheduler_tick() might be called from the fork
  code which will not quite have the intended effect ...

- removed the APIC-timer-IRQ shifting done on SMP, Dipankar Sarma's
  testing shows no negative effects.

- cleaned up include/linux/timer.h:

     - removed the timer_t typedef, and fixes up kernel/workqueue.c to use
       the 'struct timer_list' name instead.

     - removed unnecessery includes

     - renamed the 'list' field to 'entry' (it's an entry not a list head)

     - exchanged the 'function' and 'data' fields. This, besides being
       more logical, also unearthed the last few remaining places that
       initialized timers by assuming some given field ordering, the patch
       also fixes these places. (fs/xfs/pagebuf/page_buf.c,
       net/core/profile.c and net/ipv4/inetpeer.c)

     - removed the defunct sync_timers(), timer_enter() and timer_exit()
       prototypes.

     - added docbook-style comments.

- other kernel/timer.c changes:

     - base->running_timer does not have to be volatile ...

     - added consistent comments to all the important functions.

     - made the sync-waiting in del_timer_sync preempt- and lowpower-
       friendly.

i've compiled, booted & tested the patched kernel on x86 UP and SMP. I
have tried moderately high networking load as well, to make sure the timer
changes are correct - they appear to be.
parent 794aa320
......@@ -813,24 +813,9 @@ void __setup_APIC_LVTT(unsigned int clocks)
static void setup_APIC_timer(unsigned int clocks)
{
unsigned int slice, t0, t1;
unsigned long flags;
int delta;
local_save_flags(flags);
local_irq_enable();
/*
* ok, Intel has some smart code in their APIC that knows
* if a CPU was in 'hlt' lowpower mode, and this increases
* its APIC arbitration priority. To avoid the external timer
* IRQ APIC event being in synchron with the APIC clock we
* introduce an interrupt skew to spread out timer events.
*
* The number of slices within a 'big' timeslice is NR_CPUS+1
*/
slice = clocks / (NR_CPUS+1);
printk("cpu: %d, clocks: %d, slice: %d\n", smp_processor_id(), clocks, slice);
local_irq_save(flags);
/*
* Wait for IRQ0's slice:
......@@ -839,22 +824,6 @@ static void setup_APIC_timer(unsigned int clocks)
__setup_APIC_LVTT(clocks);
t0 = apic_read(APIC_TMICT)*APIC_DIVISOR;
/* Wait till TMCCT gets reloaded from TMICT... */
do {
t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
} while (delta >= 0);
/* Now wait for our slice for real. */
do {
t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
} while (delta < 0);
__setup_APIC_LVTT(clocks);
printk("CPU%d<T0:%d,T1:%d,D:%d,S:%d,C:%d>\n", smp_processor_id(), t0, t1, delta, slice, clocks);
local_irq_restore(flags);
}
......
......@@ -1680,7 +1680,7 @@ pagebuf_daemon(
page_buf_t *pb;
struct list_head *curr, *next, tmp;
struct timer_list pb_daemon_timer =
{ {NULL, NULL}, 0, 0, (timeout_fn)pagebuf_daemon_wakeup };
{ .function = (timeout_fn)pagebuf_daemon_wakeup };
/* Set up the thread */
daemonize();
......
......@@ -2,70 +2,59 @@
#define _LINUX_TIMER_H
#include <linux/config.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/cache.h>
struct tvec_t_base_s;
/*
* Timers may be dynamically created and destroyed, and should be initialized
* by a call to init_timer() upon creation.
*
* The "data" field enables use of a common timeout function for several
* timeouts. You can use this field to distinguish between the different
* invocations.
*/
typedef struct timer_list {
struct list_head list;
struct timer_list {
struct list_head entry;
unsigned long expires;
unsigned long data;
void (*function)(unsigned long);
unsigned long data;
struct tvec_t_base_s *base;
} timer_t;
};
extern void add_timer(timer_t * timer);
extern int del_timer(timer_t * timer);
#ifdef CONFIG_SMP
extern int del_timer_sync(timer_t * timer);
extern void sync_timers(void);
#define timer_enter(base, t) do { base->running_timer = t; mb(); } while (0)
#define timer_exit(base) do { base->running_timer = NULL; } while (0)
#define timer_is_running(base,t) (base->running_timer == t)
#define timer_synchronize(base,t) while (timer_is_running(base,t)) barrier()
#else
#define del_timer_sync(t) del_timer(t)
#define sync_timers() do { } while (0)
#define timer_enter(base,t) do { } while (0)
#define timer_exit(base) do { } while (0)
#endif
/*
* mod_timer is a more efficient way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
* mod_timer(a,b) is equivalent to del_timer(a); a->expires = b; add_timer(a).
* If the timer is known to be not pending (ie, in the handler), mod_timer
* is less efficient than a->expires = b; add_timer(a).
/***
* init_timer - initialize a timer.
* @timer: the timer to be initialized
*
* init_timer() must be done to a timer prior calling *any* of the
* other timer functions.
*/
int mod_timer(timer_t *timer, unsigned long expires);
extern void it_real_fn(unsigned long);
extern void init_timers(void);
extern void run_local_timers(void);
static inline void init_timer(timer_t * timer)
static inline void init_timer(struct timer_list * timer)
{
timer->list.next = timer->list.prev = NULL;
timer->base = NULL;
}
static inline int timer_pending(const timer_t * timer)
/***
* timer_pending - is a timer pending?
* @timer: the timer in question
*
* timer_pending will tell whether a given timer is currently pending,
* or not. Callers must ensure serialization wrt. other operations done
* to this timer, eg. interrupt contexts, or other CPUs on SMP.
*
* return value: 1 if the timer is pending, 0 if not.
*/
static inline int timer_pending(const struct timer_list * timer)
{
return timer->list.next != NULL;
return timer->base != NULL;
}
extern void add_timer(struct timer_list * timer);
extern int del_timer(struct timer_list * timer);
extern int mod_timer(struct timer_list *timer, unsigned long expires);
#if CONFIG_SMP
extern int del_timer_sync(struct timer_list * timer);
#else
# define del_timer_sync(t) del_timer(t)
#endif
extern void init_timers(void);
extern void run_local_timers(void);
extern void it_real_fn(unsigned long);
#endif
......@@ -15,7 +15,7 @@ struct work_struct {
void (*func)(void *);
void *data;
void *wq_data;
timer_t timer;
struct timer_list timer;
};
#define __WORK_INITIALIZER(n, f, d) { \
......
......@@ -855,6 +855,9 @@ static inline void idle_tick(runqueue_t *rq)
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
*
* It also gets called by the fork code, when changing the parent's
* timeslices.
*/
void scheduler_tick(int user_ticks, int sys_ticks)
{
......@@ -862,7 +865,6 @@ void scheduler_tick(int user_ticks, int sys_ticks)
runqueue_t *rq = this_rq();
task_t *p = current;
run_local_timers();
if (p == rq->idle) {
/* note: this timer irq context must be accounted for as well */
if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
......@@ -2109,8 +2111,6 @@ __init int migration_init(void)
spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
#endif
extern void init_timers(void);
void __init sched_init(void)
{
runqueue_t *rq;
......
......@@ -47,10 +47,12 @@ typedef struct tvec_root_s {
struct list_head vec[TVR_SIZE];
} tvec_root_t;
typedef struct timer_list timer_t;
struct tvec_t_base_s {
spinlock_t lock;
unsigned long timer_jiffies;
volatile timer_t * volatile running_timer;
timer_t *running_timer;
tvec_root_t tv1;
tvec_t tv2;
tvec_t tv3;
......@@ -69,7 +71,7 @@ static inline void internal_add_timer(tvec_base_t *base, timer_t *timer)
{
unsigned long expires = timer->expires;
unsigned long idx = expires - base->timer_jiffies;
struct list_head * vec;
struct list_head *vec;
if (idx < TVR_SIZE) {
int i = expires & TVR_MASK;
......@@ -92,24 +94,36 @@ static inline void internal_add_timer(tvec_base_t *base, timer_t *timer)
} else if (idx <= 0xffffffffUL) {
int i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
vec = base->tv5.vec + i;
} else {
} else
/* Can only get here on architectures with 64-bit jiffies */
INIT_LIST_HEAD(&timer->list);
return;
}
/*
* Timers are FIFO:
*/
list_add_tail(&timer->list, vec);
list_add_tail(&timer->entry, vec);
}
/***
* add_timer - start a timer
* @timer: the timer to be added
*
* The kernel will do a ->function(->data) callback from the
* timer interrupt at the ->expired point in the future. The
* current time is 'jiffies'.
*
* The timer's ->expired, ->function (and if the handler uses it, ->data)
* fields must be set prior calling this function.
*
* Timers with an ->expired field in the past will be executed in the next
* timer tick. It's illegal to add an already pending timer.
*/
void add_timer(timer_t *timer)
{
int cpu = get_cpu();
tvec_base_t *base = tvec_bases + cpu;
unsigned long flags;
BUG_ON(timer_pending(timer));
BUG_ON(timer_pending(timer) || !timer->function);
spin_lock_irqsave(&base->lock, flags);
internal_add_timer(base, timer);
......@@ -118,25 +132,38 @@ void add_timer(timer_t *timer)
put_cpu();
}
static inline int detach_timer (timer_t *timer)
{
if (!timer_pending(timer))
return 0;
list_del(&timer->list);
return 1;
}
/*
* mod_timer() has subtle locking semantics because parallel
* calls to it must happen serialized.
/***
* mod_timer - modify a timer's timeout
* @timer: the timer to be modified
*
* mod_timer is a more efficient way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
*
* mod_timer(timer, expires) is equivalent to:
*
* del_timer(timer); timer->expires = expires; add_timer(timer);
*
* Note that if there are multiple unserialized concurrent users of the
* same timer, then mod_timer() is the only safe way to modify the timeout,
* since add_timer() cannot modify an already running timer.
*
* The function returns whether it has modified a pending timer or not.
* (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
* active timer returns 1.)
*/
int mod_timer(timer_t *timer, unsigned long expires)
{
tvec_base_t *old_base, *new_base;
unsigned long flags;
int ret;
int ret = 0;
if (timer_pending(timer) && timer->expires == expires)
BUG_ON(!timer->function);
/*
* This is a common optimization triggered by the
* networking code - if the timer is re-modified
* to be the same thing then just return:
*/
if (timer->expires == expires && timer_pending(timer))
return 1;
local_irq_save(flags);
......@@ -156,8 +183,8 @@ int mod_timer(timer_t *timer, unsigned long expires)
spin_lock(&new_base->lock);
}
/*
* Subtle, we rely on timer->base being always
* valid and being updated atomically.
* The timer base might have changed while we were
* trying to take the lock(s):
*/
if (timer->base != old_base) {
spin_unlock(&new_base->lock);
......@@ -167,8 +194,15 @@ int mod_timer(timer_t *timer, unsigned long expires)
} else
spin_lock(&new_base->lock);
/*
* Delete the previous timeout (if there was any), and install
* the new one:
*/
if (old_base) {
list_del(&timer->entry);
ret = 1;
}
timer->expires = expires;
ret = detach_timer(timer);
internal_add_timer(new_base, timer);
timer->base = new_base;
......@@ -179,66 +213,74 @@ int mod_timer(timer_t *timer, unsigned long expires)
return ret;
}
int del_timer(timer_t * timer)
/***
* del_timer - deactive a timer.
* @timer: the timer to be deactivated
*
* del_timer() deactivates a timer - this works on both active and inactive
* timers.
*
* The function returns whether it has deactivated a pending timer or not.
* (ie. del_timer() of an inactive timer returns 0, del_timer() of an
* active timer returns 1.)
*/
int del_timer(timer_t *timer)
{
unsigned long flags;
tvec_base_t * base;
int ret;
tvec_base_t *base;
if (!timer->base)
return 0;
repeat:
base = timer->base;
if (!base)
return 0;
spin_lock_irqsave(&base->lock, flags);
if (base != timer->base) {
spin_unlock_irqrestore(&base->lock, flags);
goto repeat;
}
ret = detach_timer(timer);
timer->list.next = timer->list.prev = NULL;
list_del(&timer->entry);
timer->base = NULL;
spin_unlock_irqrestore(&base->lock, flags);
return ret;
return 1;
}
#ifdef CONFIG_SMP
/*
* SMP specific function to delete periodic timer.
* Caller must disable by some means restarting the timer
* for new. Upon exit the timer is not queued and handler is not running
* on any CPU. It returns number of times, which timer was deleted
* (for reference counting).
/***
* del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
*
* This function only differs from del_timer() on SMP: besides deactivating
* the timer it also makes sure the handler has finished executing on other
* CPUs.
*
* Synchronization rules: callers must prevent restarting of the timer,
* otherwise this function is meaningless. It must not be called from
* interrupt contexts. Upon exit the timer is not queued and the handler
* is not running on any CPU.
*
* The function returns whether it has deactivated a pending timer or not.
*/
int del_timer_sync(timer_t * timer)
int del_timer_sync(timer_t *timer)
{
tvec_base_t * base;
int ret = 0;
tvec_base_t *base = tvec_bases;
int i, ret;
if (!timer->base)
return 0;
for (;;) {
unsigned long flags;
int running;
repeat:
base = timer->base;
spin_lock_irqsave(&base->lock, flags);
if (base != timer->base) {
spin_unlock_irqrestore(&base->lock, flags);
goto repeat;
}
ret += detach_timer(timer);
timer->list.next = timer->list.prev = 0;
running = timer_is_running(base, timer);
spin_unlock_irqrestore(&base->lock, flags);
ret = del_timer(timer);
if (!running)
for (i = 0; i < NR_CPUS; i++) {
if (!cpu_online(i))
continue;
if (base->running_timer == timer) {
while (base->running_timer == timer) {
cpu_relax();
preempt_disable();
preempt_enable();
}
break;
timer_synchronize(base, timer);
}
base++;
}
return ret;
}
#endif
......@@ -258,11 +300,10 @@ static void cascade(tvec_base_t *base, tvec_t *tv)
while (curr != head) {
timer_t *tmp;
tmp = list_entry(curr, timer_t, list);
tmp = list_entry(curr, timer_t, entry);
if (tmp->base != base)
BUG();
next = curr->next;
list_del(curr); // not needed
internal_add_timer(base, tmp);
curr = next;
}
......@@ -270,7 +311,14 @@ static void cascade(tvec_base_t *base, tvec_t *tv)
tv->index = (tv->index + 1) & TVN_MASK;
}
static void __run_timers(tvec_base_t *base)
/***
* __run_timers - run all expired timers (if any) on this CPU.
* @base: the timer vector to be processed.
*
* This function cascades all vectors and executes all expired timer
* vectors.
*/
static inline void __run_timers(tvec_base_t *base)
{
unsigned long flags;
......@@ -300,22 +348,26 @@ static void __run_timers(tvec_base_t *base)
unsigned long data;
timer_t *timer;
timer = list_entry(curr, timer_t, list);
timer = list_entry(curr, timer_t, entry);
fn = timer->function;
data = timer->data;
detach_timer(timer);
timer->list.next = timer->list.prev = NULL;
timer_enter(base, timer);
list_del(&timer->entry);
timer->base = NULL;
#if CONFIG_SMP
base->running_timer = timer;
#endif
spin_unlock_irq(&base->lock);
fn(data);
spin_lock_irq(&base->lock);
timer_exit(base);
goto repeat;
}
++base->timer_jiffies;
base->tv1.index = (base->tv1.index + 1) & TVR_MASK;
}
#if CONFIG_SMP
base->running_timer = NULL;
#endif
spin_unlock_irqrestore(&base->lock, flags);
}
......@@ -607,6 +659,7 @@ void update_process_times(int user_tick)
int cpu = smp_processor_id(), system = user_tick ^ 1;
update_one_process(p, user_tick, system, cpu);
run_local_timers();
scheduler_tick(user_tick, system);
}
......
......@@ -100,7 +100,7 @@ static void delayed_work_timer_fn(unsigned long __data)
int queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay)
{
int ret = 0, cpu = get_cpu();
timer_t *timer = &work->timer;
struct timer_list *timer = &work->timer;
struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
if (!test_and_set_bit(0, &work->pending)) {
......
......@@ -35,7 +35,7 @@ long alpha_hi;
static void alpha_tick(unsigned long);
static struct timer_list alpha_timer =
{ NULL, NULL, 0, 0L, alpha_tick };
{ .function = alpha_tick };
void alpha_tick(unsigned long dummy)
{
......@@ -158,7 +158,7 @@ static void whitehole_inject(unsigned long);
int whitehole_init(struct net_device *dev);
static struct timer_list whitehole_timer =
{ NULL, NULL, 0, 0L, whitehole_inject };
{ .function = whitehole_inject };
static struct net_device whitehole_dev = {
"whitehole", 0x0, 0x0, 0x0, 0x0, 0, 0, 0, 0, 0, NULL, whitehole_init, };
......
......@@ -98,7 +98,7 @@ spinlock_t inet_peer_unused_lock = SPIN_LOCK_UNLOCKED;
static void peer_check_expire(unsigned long dummy);
static struct timer_list peer_periodic_timer =
{ { NULL, NULL }, 0, 0, &peer_check_expire };
{ .function = &peer_check_expire };
/* Exported for sysctl_net_ipv4. */
int inet_peer_gc_mintime = 10 * HZ,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment