Commit 3843e047 authored by Rolf Fokkens's avatar Rolf Fokkens Committed by Linus Torvalds

[PATCH] USER_HZ & NTP problems

I've been playing with different HZ values in the 2.4 kernel for a while
now, and apparantly Linus also has decided to introduce a USER_HZ
constant (I used CLOCKS_PER_SEC) while raising the HZ value on x86 to
1000.

On x86 timekeeping has shown to be relative fragile when raising HZ (OK,
I tried HZ=2048 which is quite high) because of the way the interrupt
timer is configured to fire HZ times each second.  This is done by
configuring a divisor in the timer chip (LATCH) which divides a certain
clock (1193180) and makes the chip fire interrupts at the resulting
frequency.

Now comes the catch: NTP requires a clock accuracy of 500 ppm.  For some
HZ values the clock is not accurate enough to meet this requirement,
hence NTP won't work well.

An example HZ value is 1020 which exceeds the 500 ppm requirement.  In
this case the best approximation is 1019.8 Hz.  the xtime.tv_usec value
is raised with a value of 980 each tick which means that after one
second the tv_usec value has increased with 999404 (should be 1000000)
which is an accuracy of 596 ppm.

Some more examples:
	  HZ Accuracy (ppm)
	---- --------------
	 100             17
	1000            151
	1024            632
	2000            687
	2008            343
	2011             18
	2048           1249

What I've been doing is replace tv_usec by tv_nsec, meaning xtime is now
a timespec instead of a timeval.  This allows the accuracy to be
improved by a factor of 1000 for any (well ...  any?) HZ value. 

Of course all kinds of calculations had te be improved as well.  The
ACTHZ constantant is introduced to approximate the actual HZ value, it's
used to do some approximations of other related values. 
parent ba815d85
...@@ -115,7 +115,7 @@ static inline unsigned long do_fast_gettimeoffset(void) ...@@ -115,7 +115,7 @@ static inline unsigned long do_fast_gettimeoffset(void)
return delay_at_last_interrupt + edx; return delay_at_last_interrupt + edx;
} }
#define TICK_SIZE tick #define TICK_SIZE (tick_nsec / 1000)
spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED;
EXPORT_SYMBOL(i8253_lock); EXPORT_SYMBOL(i8253_lock);
...@@ -280,7 +280,7 @@ void do_gettimeofday(struct timeval *tv) ...@@ -280,7 +280,7 @@ void do_gettimeofday(struct timeval *tv)
usec += lost * (1000000 / HZ); usec += lost * (1000000 / HZ);
} }
sec = xtime.tv_sec; sec = xtime.tv_sec;
usec += xtime.tv_usec; usec += (xtime.tv_nsec / 1000);
read_unlock_irqrestore(&xtime_lock, flags); read_unlock_irqrestore(&xtime_lock, flags);
while (usec >= 1000000) { while (usec >= 1000000) {
...@@ -309,7 +309,8 @@ void do_settimeofday(struct timeval *tv) ...@@ -309,7 +309,8 @@ void do_settimeofday(struct timeval *tv)
tv->tv_sec--; tv->tv_sec--;
} }
xtime = *tv; xtime.tv_sec = tv->tv_sec;
xtime.tv_nsec = (tv->tv_usec * 1000);
time_adjust = 0; /* stop active adjtime() */ time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC; time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT; time_maxerror = NTP_PHASE_LIMIT;
...@@ -437,8 +438,8 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *reg ...@@ -437,8 +438,8 @@ static inline void do_timer_interrupt(int irq, void *dev_id, struct pt_regs *reg
*/ */
if ((time_status & STA_UNSYNC) == 0 && if ((time_status & STA_UNSYNC) == 0 &&
xtime.tv_sec > last_rtc_update + 660 && xtime.tv_sec > last_rtc_update + 660 &&
xtime.tv_usec >= 500000 - ((unsigned) tick) / 2 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 &&
xtime.tv_usec <= 500000 + ((unsigned) tick) / 2) { (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) {
if (set_rtc_mmss(xtime.tv_sec) == 0) if (set_rtc_mmss(xtime.tv_sec) == 0)
last_rtc_update = xtime.tv_sec; last_rtc_update = xtime.tv_sec;
else else
...@@ -655,7 +656,7 @@ void __init time_init(void) ...@@ -655,7 +656,7 @@ void __init time_init(void)
extern int x86_udelay_tsc; extern int x86_udelay_tsc;
xtime.tv_sec = get_cmos_time(); xtime.tv_sec = get_cmos_time();
xtime.tv_usec = 0; xtime.tv_nsec = 0;
/* /*
* If we have APM enabled or the CPU clock speed is variable * If we have APM enabled or the CPU clock speed is variable
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
#define UDF_NAME_LEN 255 #define UDF_NAME_LEN 255
#define UDF_PATH_LEN 1023 #define UDF_PATH_LEN 1023
#define CURRENT_UTIME (xtime.tv_usec) #define CURRENT_UTIME (xtime.tv_nsec / 1000)
#define udf_file_entry_alloc_offset(inode)\ #define udf_file_entry_alloc_offset(inode)\
((UDF_I_EXTENDED_FE(inode) ?\ ((UDF_I_EXTENDED_FE(inode) ?\
......
...@@ -113,7 +113,7 @@ mktime (unsigned int year, unsigned int mon, ...@@ -113,7 +113,7 @@ mktime (unsigned int year, unsigned int mon,
)*60 + sec; /* finally seconds */ )*60 + sec; /* finally seconds */
} }
extern struct timeval xtime; extern struct timespec xtime;
#define CURRENT_TIME (xtime.tv_sec) #define CURRENT_TIME (xtime.tv_sec)
......
...@@ -155,6 +155,28 @@ ...@@ -155,6 +155,28 @@
/* LATCH is used in the interval timer and ftape setup. */ /* LATCH is used in the interval timer and ftape setup. */
#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */ #define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */
/* Suppose we want to devide two numbers NOM and DEN: NOM/DEN, the we can
* improve accuracy by shifting LSH bits, hence calculating:
* (NOM << LSH) / DEN
* This however means trouble for large NOM, because (NOM << LSH) may no
* longer fit in 32 bits. The following way of calculating this gives us
* some slack, under the following onditions:
* - (NOM / DEN) fits in (32 - LSH) bits.
* - (NOM % DEN) fits in (32 - LSH) bits.
*/
#define SH_DIV(NOM,DEN,LSH) ( ((NOM / DEN) << LSH) \
+ (((NOM % DEN) << LSH) + DEN / 2) / DEN)
/* HZ is the requested value. ACTHZ is actual HZ ("<< 8" is for accuracy) */
#define ACTHZ (SH_DIV (CLOCK_TICK_RATE, LATCH, 8))
/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
/* TICK_NSEC is the time between ticks in nsec assuming real ACTHZ and */
/* a value TUSEC for TICK_USEC (can be set bij adjtimex) */
#define TICK_NSEC(TUSEC) (SH_DIV (TUSEC * USER_HZ * 1000, ACTHZ, 8))
/* /*
* syscall interface - used (mainly by NTP daemon) * syscall interface - used (mainly by NTP daemon)
* to discipline kernel clock oscillator * to discipline kernel clock oscillator
...@@ -251,7 +273,8 @@ struct timex { ...@@ -251,7 +273,8 @@ struct timex {
* Note: maximum error = NTP synch distance = dispersion + delay / 2; * Note: maximum error = NTP synch distance = dispersion + delay / 2;
* estimated error = NTP dispersion. * estimated error = NTP dispersion.
*/ */
extern long tick; /* timer interrupt period */ extern unsigned long tick_usec; /* USER_HZ period (usec) */
extern unsigned long tick_nsec; /* ACTHZ period (nsec) */
extern int tickadj; /* amount of adjustment per tick */ extern int tickadj; /* amount of adjustment per tick */
/* /*
......
...@@ -82,7 +82,7 @@ asmlinkage long sys_stime(int * tptr) ...@@ -82,7 +82,7 @@ asmlinkage long sys_stime(int * tptr)
return -EFAULT; return -EFAULT;
write_lock_irq(&xtime_lock); write_lock_irq(&xtime_lock);
xtime.tv_sec = value; xtime.tv_sec = value;
xtime.tv_usec = 0; xtime.tv_nsec = 0;
last_time_offset = 0; last_time_offset = 0;
time_adjust = 0; /* stop active adjtime() */ time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC; time_status |= STA_UNSYNC;
...@@ -231,7 +231,8 @@ int do_adjtimex(struct timex *txc) ...@@ -231,7 +231,8 @@ int do_adjtimex(struct timex *txc)
/* if the quartz is off by more than 10% something is VERY wrong ! */ /* if the quartz is off by more than 10% something is VERY wrong ! */
if (txc->modes & ADJ_TICK) if (txc->modes & ADJ_TICK)
if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) if (txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ)
return -EINVAL; return -EINVAL;
write_lock_irq(&xtime_lock); write_lock_irq(&xtime_lock);
...@@ -344,13 +345,8 @@ int do_adjtimex(struct timex *txc) ...@@ -344,13 +345,8 @@ int do_adjtimex(struct timex *txc)
} /* STA_PLL || STA_PPSTIME */ } /* STA_PLL || STA_PPSTIME */
} /* txc->modes & ADJ_OFFSET */ } /* txc->modes & ADJ_OFFSET */
if (txc->modes & ADJ_TICK) { if (txc->modes & ADJ_TICK) {
/* if the quartz is off by more than 10% something is tick_usec = txc->tick;
VERY wrong ! */ tick_nsec = TICK_NSEC(tick_usec);
if (txc->tick < 900000/HZ || txc->tick > 1100000/HZ) {
result = -EINVAL;
goto leave;
}
tick = txc->tick;
} }
} /* txc->modes */ } /* txc->modes */
leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
...@@ -380,7 +376,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0 ...@@ -380,7 +376,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
txc->constant = time_constant; txc->constant = time_constant;
txc->precision = time_precision; txc->precision = time_precision;
txc->tolerance = time_tolerance; txc->tolerance = time_tolerance;
txc->tick = tick; txc->tick = tick_usec;
txc->ppsfreq = pps_freq; txc->ppsfreq = pps_freq;
txc->jitter = pps_jitter >> PPS_AVG; txc->jitter = pps_jitter >> PPS_AVG;
txc->shift = pps_shift; txc->shift = pps_shift;
......
...@@ -33,10 +33,11 @@ struct kernel_stat kstat; ...@@ -33,10 +33,11 @@ struct kernel_stat kstat;
* Timekeeping variables * Timekeeping variables
*/ */
long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ unsigned long tick_usec = TICK_USEC; /* ACTHZ period (usec) */
unsigned long tick_nsec = TICK_NSEC(TICK_USEC); /* USER_HZ period (nsec) */
/* The current time */ /* The current time */
struct timeval xtime __attribute__ ((aligned (16))); struct timespec xtime __attribute__ ((aligned (16)));
/* Don't completely fail for HZ > 500. */ /* Don't completely fail for HZ > 500. */
int tickadj = 500/HZ ? : 1; /* microsecs */ int tickadj = 500/HZ ? : 1; /* microsecs */
...@@ -63,7 +64,6 @@ long time_adj; /* tick adjust (scaled 1 / HZ) */ ...@@ -63,7 +64,6 @@ long time_adj; /* tick adjust (scaled 1 / HZ) */
long time_reftime; /* time at last adjustment (s) */ long time_reftime; /* time at last adjustment (s) */
long time_adjust; long time_adjust;
long time_adjust_step;
unsigned long event; unsigned long event;
...@@ -465,6 +465,8 @@ static void second_overflow(void) ...@@ -465,6 +465,8 @@ static void second_overflow(void)
/* in the NTP reference this is called "hardclock()" */ /* in the NTP reference this is called "hardclock()" */
static void update_wall_time_one_tick(void) static void update_wall_time_one_tick(void)
{ {
long time_adjust_step;
if ( (time_adjust_step = time_adjust) != 0 ) { if ( (time_adjust_step = time_adjust) != 0 ) {
/* We are doing an adjtime thing. /* We are doing an adjtime thing.
* *
...@@ -483,21 +485,21 @@ static void update_wall_time_one_tick(void) ...@@ -483,21 +485,21 @@ static void update_wall_time_one_tick(void)
/* Reduce by this step the amount of time left */ /* Reduce by this step the amount of time left */
time_adjust -= time_adjust_step; time_adjust -= time_adjust_step;
} }
xtime.tv_usec += tick + time_adjust_step; xtime.tv_nsec += tick_nsec + time_adjust_step * 1000;
/* /*
* Advance the phase, once it gets to one microsecond, then * Advance the phase, once it gets to one microsecond, then
* advance the tick more. * advance the tick more.
*/ */
time_phase += time_adj; time_phase += time_adj;
if (time_phase <= -FINEUSEC) { if (time_phase <= -FINEUSEC) {
long ltemp = -time_phase >> SHIFT_SCALE; long ltemp = -time_phase >> (SHIFT_SCALE - 10);
time_phase += ltemp << SHIFT_SCALE; time_phase += ltemp << (SHIFT_SCALE - 10);
xtime.tv_usec -= ltemp; xtime.tv_nsec -= ltemp;
} }
else if (time_phase >= FINEUSEC) { else if (time_phase >= FINEUSEC) {
long ltemp = time_phase >> SHIFT_SCALE; long ltemp = time_phase >> (SHIFT_SCALE - 10);
time_phase -= ltemp << SHIFT_SCALE; time_phase -= ltemp << (SHIFT_SCALE - 10);
xtime.tv_usec += ltemp; xtime.tv_nsec += ltemp;
} }
} }
...@@ -515,8 +517,8 @@ static void update_wall_time(unsigned long ticks) ...@@ -515,8 +517,8 @@ static void update_wall_time(unsigned long ticks)
update_wall_time_one_tick(); update_wall_time_one_tick();
} while (ticks); } while (ticks);
if (xtime.tv_usec >= 1000000) { if (xtime.tv_nsec >= 1000000000) {
xtime.tv_usec -= 1000000; xtime.tv_nsec -= 1000000000;
xtime.tv_sec++; xtime.tv_sec++;
second_overflow(); second_overflow();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment