Commit 4e6d7c2a authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'timers/core' into perf/timer, to apply dependent patch

An upcoming patch will depend on tai_ns() and NMI-safe ktime_get_raw_fast(),
so merge timers/core here in a separate topic branch until it's all cooked
and timers/core is merged upstream.
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 3c435c1e fe5fba05
......@@ -103,7 +103,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
/*
* 120000 rough estimate from the calculations in
* __clocksource_updatefreq_scale.
* __clocksource_update_freq_scale.
*/
clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
32768, NSEC_PER_SEC, 120000);
......
......@@ -200,7 +200,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
void update_vsyscall(struct timekeeper *tk)
{
struct timespec xtime_coarse;
u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
++vdso_data->tb_seq_count;
smp_wmb();
......@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
vdso_data->cs_cycle_last = tk->tkr.cycle_last;
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
vdso_data->cs_mult = tk->tkr.mult;
vdso_data->cs_shift = tk->tkr.shift;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->cs_mult = tk->tkr_mono.mult;
vdso_data->cs_shift = tk->tkr_mono.shift;
}
smp_wmb();
......
......@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
{
u64 nsecps;
if (tk->tkr.clock != &clocksource_tod)
if (tk->tkr_mono.clock != &clocksource_tod)
return;
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_wmb();
vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->wtom_clock_sec =
tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+ ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
while (vdso_data->wtom_clock_nsec >= nsecps) {
vdso_data->wtom_clock_nsec -= nsecps;
vdso_data->wtom_clock_sec++;
......@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec =
(long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
(long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
vdso_data->wtom_coarse_sec =
vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_coarse_nsec =
......@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtom_coarse_sec++;
}
vdso_data->tk_mult = tk->tkr.mult;
vdso_data->tk_shift = tk->tkr.shift;
vdso_data->tk_mult = tk->tkr_mono.mult;
vdso_data->tk_shift = tk->tkr_mono.shift;
smp_wmb();
++vdso_data->tb_update_count;
}
......@@ -283,7 +283,7 @@ void __init time_init(void)
if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
panic("Couldn't request external interrupt 0x1406");
if (clocksource_register(&clocksource_tod) != 0)
if (__clocksource_register(&clocksource_tod) != 0)
panic("Could not register TOD clock source");
/* Enable TOD clock interrupts on the boot cpu. */
......
......@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
.rating = 100,
.read = timer_cs_read,
.mask = CLOCKSOURCE_MASK(64),
.shift = 2,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static __init int setup_timer_cs(void)
{
timer_cs_enabled = 1;
timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
timer_cs.shift);
return clocksource_register(&timer_cs);
return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
}
#ifdef CONFIG_SMP
......
......@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
if (tk->tkr.clock != &cycle_counter_cs)
if (tk->tkr_mono.clock != &cycle_counter_cs)
return;
write_seqcount_begin(&vdso_data->tb_seq);
vdso_data->cycle_last = tk->tkr.cycle_last;
vdso_data->mask = tk->tkr.mask;
vdso_data->mult = tk->tkr.mult;
vdso_data->shift = tk->tkr.shift;
vdso_data->cycle_last = tk->tkr_mono.cycle_last;
vdso_data->mask = tk->tkr_mono.mask;
vdso_data->mult = tk->tkr_mono.mult;
vdso_data->shift = tk->tkr_mono.shift;
vdso_data->wall_time_sec = tk->xtime_sec;
vdso_data->wall_time_snsec = tk->tkr.xtime_nsec;
vdso_data->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdso_data->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
<< tk->tkr.shift);
<< tk->tkr_mono.shift);
while (vdso_data->monotonic_time_snsec >=
(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdso_data->monotonic_time_snsec -=
((u64)NSEC_PER_SEC) << tk->tkr.shift;
((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdso_data->monotonic_time_sec++;
}
vdso_data->wall_time_coarse_sec = tk->xtime_sec;
vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
tk->tkr.shift);
vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
tk->tkr_mono.shift);
vdso_data->monotonic_time_coarse_sec =
vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
......
......@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
gtod_write_begin(vdata);
/* copy vsyscall data */
vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode;
vdata->cycle_last = tk->tkr.cycle_last;
vdata->mask = tk->tkr.mask;
vdata->mult = tk->tkr.mult;
vdata->shift = tk->tkr.shift;
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
vdata->cycle_last = tk->tkr_mono.cycle_last;
vdata->mask = tk->tkr_mono.mask;
vdata->mult = tk->tkr_mono.mult;
vdata->shift = tk->tkr_mono.shift;
vdata->wall_time_sec = tk->xtime_sec;
vdata->wall_time_snsec = tk->tkr.xtime_nsec;
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdata->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
vdata->monotonic_time_snsec = tk->tkr.xtime_nsec
vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
<< tk->tkr.shift);
<< tk->tkr_mono.shift);
while (vdata->monotonic_time_snsec >=
(((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdata->monotonic_time_snsec -=
((u64)NSEC_PER_SEC) << tk->tkr.shift;
((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdata->monotonic_time_sec++;
}
vdata->wall_time_coarse_sec = tk->xtime_sec;
vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
tk->tkr.shift);
vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
tk->tkr_mono.shift);
vdata->monotonic_time_coarse_sec =
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
......
......@@ -1070,19 +1070,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
u64 boot_ns;
boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
write_seqcount_begin(&vdata->seq);
/* copy pvclock gtod data */
vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode;
vdata->clock.cycle_last = tk->tkr.cycle_last;
vdata->clock.mask = tk->tkr.mask;
vdata->clock.mult = tk->tkr.mult;
vdata->clock.shift = tk->tkr.shift;
vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
vdata->clock.mask = tk->tkr_mono.mask;
vdata->clock.mult = tk->tkr_mono.mult;
vdata->clock.shift = tk->tkr_mono.shift;
vdata->boot_ns = boot_ns;
vdata->nsec_base = tk->tkr.xtime_nsec;
vdata->nsec_base = tk->tkr_mono.xtime_nsec;
write_seqcount_end(&vdata->seq);
}
......
......@@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
ret = em_sti_start(p, USER_CLOCKSOURCE);
if (!ret)
__clocksource_updatefreq_hz(cs, p->rate);
__clocksource_update_freq_hz(cs, p->rate);
return ret;
}
......
......@@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
if (!ret) {
__clocksource_updatefreq_hz(cs, ch->rate);
__clocksource_update_freq_hz(cs, ch->rate);
ch->cs_enabled = true;
}
return ret;
......
......@@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
ret = sh_tmu_enable(ch);
if (!ret) {
__clocksource_updatefreq_hz(cs, ch->rate);
__clocksource_update_freq_hz(cs, ch->rate);
ch->cs_enabled = true;
}
......
......@@ -39,6 +39,8 @@ enum clock_event_mode {
CLOCK_EVT_MODE_PERIODIC,
CLOCK_EVT_MODE_ONESHOT,
CLOCK_EVT_MODE_RESUME,
/* Legacy ->set_mode() callback doesn't support below modes */
};
/*
......@@ -81,7 +83,11 @@ enum clock_event_mode {
* @mode: operating mode assigned by the management code
* @features: features
* @retries: number of forced programming retries
* @set_mode: set mode function
* @set_mode: legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
* @set_mode_periodic: switch mode to periodic, if !set_mode
* @set_mode_oneshot: switch mode to oneshot, if !set_mode
* @set_mode_shutdown: switch mode to shutdown, if !set_mode
* @set_mode_resume: resume clkevt device, if !set_mode
* @broadcast: function to broadcast events
* @min_delta_ticks: minimum delta value in ticks stored for reconfiguration
* @max_delta_ticks: maximum delta value in ticks stored for reconfiguration
......@@ -108,9 +114,20 @@ struct clock_event_device {
unsigned int features;
unsigned long retries;
void (*broadcast)(const struct cpumask *mask);
/*
* Mode transition callback(s): Only one of the two groups should be
* defined:
* - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
* - set_mode_{shutdown|periodic|oneshot|resume}().
*/
void (*set_mode)(enum clock_event_mode mode,
struct clock_event_device *);
int (*set_mode_periodic)(struct clock_event_device *);
int (*set_mode_oneshot)(struct clock_event_device *);
int (*set_mode_shutdown)(struct clock_event_device *);
int (*set_mode_resume)(struct clock_event_device *);
void (*broadcast)(const struct cpumask *mask);
void (*suspend)(struct clock_event_device *);
void (*resume)(struct clock_event_device *);
unsigned long min_delta_ticks;
......
......@@ -56,6 +56,7 @@ struct module;
* @shift: cycle to nanosecond divisor (power of two)
* @max_idle_ns: max idle time permitted by the clocksource (nsecs)
* @maxadj: maximum adjustment value to mult (~11%)
* @max_cycles: maximum safe cycle value which won't overflow on multiplication
* @flags: flags describing special properties
* @archdata: arch-specific data
* @suspend: suspend function for the clocksource, if necessary
......@@ -76,7 +77,7 @@ struct clocksource {
#ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
struct arch_clocksource_data archdata;
#endif
u64 max_cycles;
const char *name;
struct list_head list;
int rating;
......@@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
}
extern int clocksource_register(struct clocksource*);
extern int clocksource_unregister(struct clocksource*);
extern void clocksource_touch_watchdog(void);
extern struct clocksource* clocksource_get_next(void);
......@@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
extern void clocksource_mark_unstable(struct clocksource *cs);
extern u64
clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
extern void
clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
......@@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
extern int
__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
extern void
__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
/*
* Don't call this unless you are a default clocksource
* (AKA: jiffies) and absolutely have to.
*/
static inline int __clocksource_register(struct clocksource *cs)
{
return __clocksource_register_scale(cs, 1, 0);
}
static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
{
......@@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
return __clocksource_register_scale(cs, 1000, khz);
}
static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
{
__clocksource_updatefreq_scale(cs, 1, hz);
__clocksource_update_freq_scale(cs, 1, hz);
}
static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
{
__clocksource_updatefreq_scale(cs, 1000, khz);
__clocksource_update_freq_scale(cs, 1000, khz);
}
......
......@@ -16,16 +16,16 @@
* @read: Read function of @clock
* @mask: Bitmask for two's complement subtraction of non 64bit clocks
* @cycle_last: @clock cycle value at last update
* @mult: NTP adjusted multiplier for scaled math conversion
* @mult: (NTP adjusted) multiplier for scaled math conversion
* @shift: Shift value for scaled math conversion
* @xtime_nsec: Shifted (fractional) nano seconds offset for readout
* @base_mono: ktime_t (nanoseconds) base time for readout
* @base: ktime_t (nanoseconds) base time for readout
*
* This struct has size 56 byte on 64 bit. Together with a seqcount it
* occupies a single 64byte cache line.
*
* The struct is separate from struct timekeeper as it is also used
* for a fast NMI safe accessor to clock monotonic.
* for a fast NMI safe accessors.
*/
struct tk_read_base {
struct clocksource *clock;
......@@ -35,12 +35,13 @@ struct tk_read_base {
u32 mult;
u32 shift;
u64 xtime_nsec;
ktime_t base_mono;
ktime_t base;
};
/**
* struct timekeeper - Structure holding internal timekeeping values.
* @tkr: The readout base structure
* @tkr_mono: The readout base structure for CLOCK_MONOTONIC
* @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
* @xtime_sec: Current CLOCK_REALTIME time in seconds
* @ktime_sec: Current CLOCK_MONOTONIC time in seconds
* @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
......@@ -48,7 +49,6 @@ struct tk_read_base {
* @offs_boot: Offset clock monotonic -> clock boottime
* @offs_tai: Offset clock monotonic -> clock tai
* @tai_offset: The current UTC to TAI offset in seconds
* @base_raw: Monotonic raw base time in ktime_t format
* @raw_time: Monotonic raw base time in timespec64 format
* @cycle_interval: Number of clock cycles in one NTP interval
* @xtime_interval: Number of clock shifted nano seconds in one NTP
......@@ -76,7 +76,8 @@ struct tk_read_base {
* used instead.
*/
struct timekeeper {
struct tk_read_base tkr;
struct tk_read_base tkr_mono;
struct tk_read_base tkr_raw;
u64 xtime_sec;
unsigned long ktime_sec;
struct timespec64 wall_to_monotonic;
......@@ -84,7 +85,6 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
ktime_t base_raw;
struct timespec64 raw_time;
/* The following members are for timekeeping internal use */
......
......@@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
return ktime_to_ns(ktime_get_boottime());
}
static inline u64 ktime_get_tai_ns(void)
{
return ktime_to_ns(ktime_get_clocktai());
}
static inline u64 ktime_get_raw_ns(void)
{
return ktime_to_ns(ktime_get_raw());
}
extern u64 ktime_get_mono_fast_ns(void);
extern u64 ktime_get_raw_fast_ns(void);
/*
* Timespec interfaces utilizing the ktime based ones
......
......@@ -94,6 +94,57 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
}
EXPORT_SYMBOL_GPL(clockevent_delta2ns);
static int __clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode)
{
/* Transition with legacy set_mode() callback */
if (dev->set_mode) {
/* Legacy callback doesn't support new modes */
if (mode > CLOCK_EVT_MODE_RESUME)
return -ENOSYS;
dev->set_mode(mode, dev);
return 0;
}
if (dev->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
/* Transition with new mode-specific callbacks */
switch (mode) {
case CLOCK_EVT_MODE_UNUSED:
/*
* This is an internal state, which is guaranteed to go from
* SHUTDOWN to UNUSED. No driver interaction required.
*/
return 0;
case CLOCK_EVT_MODE_SHUTDOWN:
return dev->set_mode_shutdown(dev);
case CLOCK_EVT_MODE_PERIODIC:
/* Core internal bug */
if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
return -ENOSYS;
return dev->set_mode_periodic(dev);
case CLOCK_EVT_MODE_ONESHOT:
/* Core internal bug */
if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
return -ENOSYS;
return dev->set_mode_oneshot(dev);
case CLOCK_EVT_MODE_RESUME:
/* Optional callback */
if (dev->set_mode_resume)
return dev->set_mode_resume(dev);
else
return 0;
default:
return -ENOSYS;
}
}
/**
* clockevents_set_mode - set the operating mode of a clock event device
* @dev: device to modify
......@@ -105,7 +156,9 @@ void clockevents_set_mode(struct clock_event_device *dev,
enum clock_event_mode mode)
{
if (dev->mode != mode) {
dev->set_mode(mode, dev);
if (__clockevents_set_mode(dev, mode))
return;
dev->mode = mode;
/*
......@@ -373,6 +426,35 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
}
EXPORT_SYMBOL_GPL(clockevents_unbind);
/* Sanity check of mode transition callbacks */
static int clockevents_sanity_check(struct clock_event_device *dev)
{
/* Legacy set_mode() callback */
if (dev->set_mode) {
/* We shouldn't be supporting new modes now */
WARN_ON(dev->set_mode_periodic || dev->set_mode_oneshot ||
dev->set_mode_shutdown || dev->set_mode_resume);
return 0;
}
if (dev->features & CLOCK_EVT_FEAT_DUMMY)
return 0;
/* New mode-specific callbacks */
if (!dev->set_mode_shutdown)
return -EINVAL;
if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
!dev->set_mode_periodic)
return -EINVAL;
if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
!dev->set_mode_oneshot)
return -EINVAL;
return 0;
}
/**
* clockevents_register_device - register a clock event device
* @dev: device to register
......@@ -382,6 +464,8 @@ void clockevents_register_device(struct clock_event_device *dev)
unsigned long flags;
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
BUG_ON(clockevents_sanity_check(dev));
if (!dev->cpumask) {
WARN_ON(num_possible_cpus() > 1);
dev->cpumask = cpumask_of(smp_processor_id());
......@@ -449,7 +533,7 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
return clockevents_program_event(dev, dev->next_event, false);
if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
return __clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
return 0;
}
......
......@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
schedule_work(&watchdog_work);
}
static void clocksource_unstable(struct clocksource *cs, int64_t delta)
{
printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
cs->name, delta);
__clocksource_unstable(cs);
}
/**
* clocksource_mark_unstable - mark clocksource unstable via watchdog
* @cs: clocksource to be marked unstable
......@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
static void clocksource_watchdog(unsigned long data)
{
struct clocksource *cs;
cycle_t csnow, wdnow, delta;
cycle_t csnow, wdnow, cslast, wdlast, delta;
int64_t wd_nsec, cs_nsec;
int next_cpu, reset_pending;
......@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
wdlast = cs->wd_last; /* save these in case we print them */
cslast = cs->cs_last;
cs->cs_last = csnow;
cs->wd_last = wdnow;
......@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
/* Check the deviation from the watchdog clocksource. */
if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
clocksource_unstable(cs, cs_nsec - wd_nsec);
pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
watchdog->name, wdnow, wdlast, watchdog->mask);
pr_warn(" '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
cs->name, csnow, cslast, cs->mask);
__clocksource_unstable(cs);
continue;
}
......@@ -469,26 +469,22 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
* @shift: cycle to nanosecond divisor (power of two)
* @maxadj: maximum adjustment value to mult (~11%)
* @mask: bitmask for two's complement subtraction of non 64 bit counters
* @max_cyc: maximum cycle value before potential overflow (does not include
* any safety margin)
*
* NOTE: This function includes a safety margin of 50%, so that bad clock values
* can be detected.
*/
u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
{
u64 max_nsecs, max_cycles;
/*
* Calculate the maximum number of cycles that we can pass to the
* cyc2ns function without overflowing a 64-bit signed result. The
* maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
* which is equivalent to the below.
* max_cycles < (2^63)/(mult + maxadj)
* max_cycles < 2^(log2((2^63)/(mult + maxadj)))
* max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
* max_cycles < 2^(63 - log2(mult + maxadj))
* max_cycles < 1 << (63 - log2(mult + maxadj))
* Please note that we add 1 to the result of the log2 to account for
* any rounding errors, ensure the above inequality is satisfied and
* no overflow will occur.
* cyc2ns() function without overflowing a 64-bit result.
*/
max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
max_cycles = ULLONG_MAX;
do_div(max_cycles, mult+maxadj);
/*
* The actual maximum number of cycles we can defer the clocksource is
......@@ -499,27 +495,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
max_cycles = min(max_cycles, mask);
max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
/* return the max_cycles value as well if requested */
if (max_cyc)
*max_cyc = max_cycles;
/* Return 50% of the actual maximum, so we can detect bad values */
max_nsecs >>= 1;
return max_nsecs;
}
/**
* clocksource_max_deferment - Returns max time the clocksource can be deferred
* @cs: Pointer to clocksource
* clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
* @cs: Pointer to clocksource to be updated
*
*/
static u64 clocksource_max_deferment(struct clocksource *cs)
static inline void clocksource_update_max_deferment(struct clocksource *cs)
{
u64 max_nsecs;
max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
cs->mask);
/*
* To ensure that the clocksource does not wrap whilst we are idle,
* limit the time the clocksource can be deferred by 12.5%. Please
* note a margin of 12.5% is used because this can be computed with
* a shift, versus say 10% which would require division.
*/
return max_nsecs - (max_nsecs >> 3);
cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
cs->maxadj, cs->mask,
&cs->max_cycles);
}
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
......@@ -648,7 +643,7 @@ static void clocksource_enqueue(struct clocksource *cs)
}
/**
* __clocksource_updatefreq_scale - Used update clocksource with new freq
* __clocksource_update_freq_scale - Used update clocksource with new freq
* @cs: clocksource to be registered
* @scale: Scale factor multiplied against freq to get clocksource hz
* @freq: clocksource frequency (cycles per second) divided by scale
......@@ -656,48 +651,64 @@ static void clocksource_enqueue(struct clocksource *cs)
* This should only be called from the clocksource->enable() method.
*
* This *SHOULD NOT* be called directly! Please use the
* clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
* __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
* functions.
*/
void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
{
u64 sec;
/*
* Calc the maximum number of seconds which we can run before
* wrapping around. For clocksources which have a mask > 32bit
* we need to limit the max sleep time to have a good
* conversion precision. 10 minutes is still a reasonable
* amount. That results in a shift value of 24 for a
* clocksource with mask >= 40bit and f >= 4GHz. That maps to
* ~ 0.06ppm granularity for NTP. We apply the same 12.5%
* margin as we do in clocksource_max_deferment()
* Default clocksources are *special* and self-define their mult/shift.
* But, you're not special, so you should specify a freq value.
*/
sec = (cs->mask - (cs->mask >> 3));
do_div(sec, freq);
do_div(sec, scale);
if (!sec)
sec = 1;
else if (sec > 600 && cs->mask > UINT_MAX)
sec = 600;
clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
NSEC_PER_SEC / scale, sec * scale);
if (freq) {
/*
* Calc the maximum number of seconds which we can run before
* wrapping around. For clocksources which have a mask > 32-bit
* we need to limit the max sleep time to have a good
* conversion precision. 10 minutes is still a reasonable
* amount. That results in a shift value of 24 for a
* clocksource with mask >= 40-bit and f >= 4GHz. That maps to
* ~ 0.06ppm granularity for NTP.
*/
sec = cs->mask;
do_div(sec, freq);
do_div(sec, scale);
if (!sec)
sec = 1;
else if (sec > 600 && cs->mask > UINT_MAX)
sec = 600;
clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
NSEC_PER_SEC / scale, sec * scale);
}
/*
* for clocksources that have large mults, to avoid overflow.
* Since mult may be adjusted by ntp, add an safety extra margin
*
* Ensure clocksources that have large 'mult' values don't overflow
* when adjusted.
*/
cs->maxadj = clocksource_max_adjustment(cs);
while ((cs->mult + cs->maxadj < cs->mult)
|| (cs->mult - cs->maxadj > cs->mult)) {
while (freq && ((cs->mult + cs->maxadj < cs->mult)
|| (cs->mult - cs->maxadj > cs->mult))) {
cs->mult >>= 1;
cs->shift--;
cs->maxadj = clocksource_max_adjustment(cs);
}
cs->max_idle_ns = clocksource_max_deferment(cs);
/*
* Only warn for *special* clocksources that self-define
* their mult/shift values and don't specify a freq.
*/
WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
"timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
cs->name);
clocksource_update_max_deferment(cs);
pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
}
EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
/**
* __clocksource_register_scale - Used to install new clocksources
......@@ -714,7 +725,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
/* Initialize mult/shift and max_idle_ns */
__clocksource_updatefreq_scale(cs, scale, freq);
__clocksource_update_freq_scale(cs, scale, freq);
/* Add clocksource to the clocksource list */
mutex_lock(&clocksource_mutex);
......@@ -726,33 +737,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
}
EXPORT_SYMBOL_GPL(__clocksource_register_scale);
/**
* clocksource_register - Used to install new clocksources
* @cs: clocksource to be registered
*
* Returns -EBUSY if registration fails, zero otherwise.
*/
int clocksource_register(struct clocksource *cs)
{
/* calculate max adjustment for given mult/shift */
cs->maxadj = clocksource_max_adjustment(cs);
WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
"Clocksource %s might overflow on 11%% adjustment\n",
cs->name);
/* calculate max idle time permitted for this clocksource */
cs->max_idle_ns = clocksource_max_deferment(cs);
mutex_lock(&clocksource_mutex);
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
clocksource_select();
mutex_unlock(&clocksource_mutex);
return 0;
}
EXPORT_SYMBOL(clocksource_register);
static void __clocksource_change_rating(struct clocksource *cs, int rating)
{
list_del(&cs->list);
......
......@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
.mask = 0xffffffff, /*32bits*/
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
.shift = JIFFIES_SHIFT,
.max_cycles = 10,
};
__cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
......@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
static int __init init_jiffies_clocksource(void)
{
return clocksource_register(&clocksource_jiffies);
return __clocksource_register(&clocksource_jiffies);
}
core_initcall(init_jiffies_clocksource);
......@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
clocksource_register(&refined_jiffies);
__clocksource_register(&refined_jiffies);
return 0;
}
/*
* sched_clock.c: support for extending counters to full 64-bit ns counter
* sched_clock.c: Generic sched_clock() support, to extend low level
* hardware time counters to full 64-bit ns values.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
......@@ -18,15 +19,53 @@
#include <linux/seqlock.h>
#include <linux/bitops.h>
struct clock_data {
ktime_t wrap_kt;
/**
* struct clock_read_data - data required to read from sched_clock()
*
* @epoch_ns: sched_clock() value at last update
* @epoch_cyc: Clock cycle value at last update.
* @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit
* clocks.
* @read_sched_clock: Current clock source (or dummy source when suspended).
* @mult: Multipler for scaled math conversion.
* @shift: Shift value for scaled math conversion.
*
* Care must be taken when updating this structure; it is read by
* some very hot code paths. It occupies <=40 bytes and, when combined
* with the seqcount used to synchronize access, comfortably fits into
* a 64 byte cache line.
*/
struct clock_read_data {
u64 epoch_ns;
u64 epoch_cyc;
seqcount_t seq;
unsigned long rate;
u64 sched_clock_mask;
u64 (*read_sched_clock)(void);
u32 mult;
u32 shift;
bool suspended;
};
/**
* struct clock_data - all data needed for sched_clock() (including
* registration of a new clock source)
*
* @seq: Sequence counter for protecting updates. The lowest
* bit is the index for @read_data.
* @read_data: Data required to read from sched_clock.
* @wrap_kt: Duration for which clock can run before wrapping.
* @rate: Tick rate of the registered clock.
* @actual_read_sched_clock: Registered hardware level clock read function.
*
* The ordering of this structure has been chosen to optimize cache
* performance. In particular 'seq' and 'read_data[0]' (combined) should fit
* into a single 64-byte cache line.
*/
struct clock_data {
seqcount_t seq;
struct clock_read_data read_data[2];
ktime_t wrap_kt;
unsigned long rate;
u64 (*actual_read_sched_clock)(void);
};
static struct hrtimer sched_clock_timer;
......@@ -34,12 +73,6 @@ static int irqtime = -1;
core_param(irqtime, irqtime, int, 0400);
static struct clock_data cd = {
.mult = NSEC_PER_SEC / HZ,
};
static u64 __read_mostly sched_clock_mask;
static u64 notrace jiffy_sched_clock_read(void)
{
/*
......@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
return (u64)(jiffies - INITIAL_JIFFIES);
}
static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
static struct clock_data cd ____cacheline_aligned = {
.read_data[0] = { .mult = NSEC_PER_SEC / HZ,
.read_sched_clock = jiffy_sched_clock_read, },
.actual_read_sched_clock = jiffy_sched_clock_read,
};
static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
{
......@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
unsigned long long notrace sched_clock(void)
{
u64 epoch_ns;
u64 epoch_cyc;
u64 cyc;
u64 cyc, res;
unsigned long seq;
if (cd.suspended)
return cd.epoch_ns;
struct clock_read_data *rd;
do {
seq = raw_read_seqcount_begin(&cd.seq);
epoch_cyc = cd.epoch_cyc;
epoch_ns = cd.epoch_ns;
seq = raw_read_seqcount(&cd.seq);
rd = cd.read_data + (seq & 1);
cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
rd->sched_clock_mask;
res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
} while (read_seqcount_retry(&cd.seq, seq));
cyc = read_sched_clock();
cyc = (cyc - epoch_cyc) & sched_clock_mask;
return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
return res;
}
/*
* Updating the data required to read the clock.
*
* sched_clock() will never observe mis-matched data even if called from
* an NMI. We do this by maintaining an odd/even copy of the data and
* steering sched_clock() to one or the other using a sequence counter.
* In order to preserve the data cache profile of sched_clock() as much
* as possible the system reverts back to the even copy when the update
* completes; the odd copy is used *only* during an update.
*/
static void update_clock_read_data(struct clock_read_data *rd)
{
/* update the backup (odd) copy with the new data */
cd.read_data[1] = *rd;
/* steer readers towards the odd copy */
raw_write_seqcount_latch(&cd.seq);
/* now its safe for us to update the normal (even) copy */
cd.read_data[0] = *rd;
/* switch readers back to the even copy */
raw_write_seqcount_latch(&cd.seq);
}
/*
* Atomically update the sched_clock epoch.
* Atomically update the sched_clock() epoch.
*/
static void notrace update_sched_clock(void)
static void update_sched_clock(void)
{
unsigned long flags;
u64 cyc;
u64 ns;
struct clock_read_data rd;
rd = cd.read_data[0];
cyc = cd.actual_read_sched_clock();
ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
rd.epoch_ns = ns;
rd.epoch_cyc = cyc;
cyc = read_sched_clock();
ns = cd.epoch_ns +
cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
cd.mult, cd.shift);
raw_local_irq_save(flags);
raw_write_seqcount_begin(&cd.seq);
cd.epoch_ns = ns;
cd.epoch_cyc = cyc;
raw_write_seqcount_end(&cd.seq);
raw_local_irq_restore(flags);
update_clock_read_data(&rd);
}
static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
{
update_sched_clock();
hrtimer_forward_now(hrt, cd.wrap_kt);
return HRTIMER_RESTART;
}
void __init sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate)
void __init
sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
{
u64 res, wrap, new_mask, new_epoch, cyc, ns;
u32 new_mult, new_shift;
ktime_t new_wrap_kt;
unsigned long r;
char r_unit;
struct clock_read_data rd;
if (cd.rate > rate)
return;
WARN_ON(!irqs_disabled());
/* calculate the mult/shift to convert counter ticks to ns. */
/* Calculate the mult/shift to convert counter ticks to ns. */
clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
new_mask = CLOCKSOURCE_MASK(bits);
cd.rate = rate;
/* Calculate how many nanosecs until we risk wrapping */
wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
cd.wrap_kt = ns_to_ktime(wrap);
/* calculate how many ns until we wrap */
wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
rd = cd.read_data[0];
/* update epoch for new counter and update epoch_ns from old counter*/
/* Update epoch for new counter and update 'epoch_ns' from old counter*/
new_epoch = read();
cyc = read_sched_clock();
ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
cd.mult, cd.shift);
cyc = cd.actual_read_sched_clock();
ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
cd.actual_read_sched_clock = read;
raw_write_seqcount_begin(&cd.seq);
read_sched_clock = read;
sched_clock_mask = new_mask;
cd.rate = rate;
cd.wrap_kt = new_wrap_kt;
cd.mult = new_mult;
cd.shift = new_shift;
cd.epoch_cyc = new_epoch;
cd.epoch_ns = ns;
raw_write_seqcount_end(&cd.seq);
rd.read_sched_clock = read;
rd.sched_clock_mask = new_mask;
rd.mult = new_mult;
rd.shift = new_shift;
rd.epoch_cyc = new_epoch;
rd.epoch_ns = ns;
update_clock_read_data(&rd);
r = rate;
if (r >= 4000000) {
r /= 1000000;
r_unit = 'M';
} else if (r >= 1000) {
r /= 1000;
r_unit = 'k';
} else
r_unit = ' ';
/* calculate the ns resolution of this counter */
} else {
if (r >= 1000) {
r /= 1000;
r_unit = 'k';
} else {
r_unit = ' ';
}
}
/* Calculate the ns resolution of this counter */
res = cyc_to_ns(1ULL, new_mult, new_shift);
pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
bits, r, r_unit, res, wrap);
/* Enable IRQ time accounting if we have a fast enough sched_clock */
/* Enable IRQ time accounting if we have a fast enough sched_clock() */
if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
enable_sched_clock_irqtime();
......@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
void __init sched_clock_postinit(void)
{
/*
* If no sched_clock function has been provided at that point,
* If no sched_clock() function has been provided at that point,
* make it the final one one.
*/
if (read_sched_clock == jiffy_sched_clock_read)
if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
update_sched_clock();
......@@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
}
/*
* Clock read function for use when the clock is suspended.
*
* This function makes it appear to sched_clock() as if the clock
* stopped counting at its last update.
*
* This function must only be called from the critical
* section in sched_clock(). It relies on the read_seqcount_retry()
* at the end of the critical section to be sure we observe the
* correct copy of 'epoch_cyc'.
*/
static u64 notrace suspended_sched_clock_read(void)
{
unsigned long seq = raw_read_seqcount(&cd.seq);
return cd.read_data[seq & 1].epoch_cyc;
}
static int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data[0];
update_sched_clock();
hrtimer_cancel(&sched_clock_timer);
cd.suspended = true;
rd->read_sched_clock = suspended_sched_clock_read;
return 0;
}
static void sched_clock_resume(void)
{
cd.epoch_cyc = read_sched_clock();
struct clock_read_data *rd = &cd.read_data[0];
rd->epoch_cyc = cd.actual_read_sched_clock();
hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
cd.suspended = false;
rd->read_sched_clock = cd.actual_read_sched_clock;
}
static struct syscore_ops sched_clock_ops = {
.suspend = sched_clock_suspend,
.resume = sched_clock_resume,
.suspend = sched_clock_suspend,
.resume = sched_clock_resume,
};
static int __init sched_clock_syscore_init(void)
{
register_syscore_ops(&sched_clock_ops);
return 0;
}
device_initcall(sched_clock_syscore_init);
......@@ -59,6 +59,7 @@ struct tk_fast {
};
static struct tk_fast tk_fast_mono ____cacheline_aligned;
static struct tk_fast tk_fast_raw ____cacheline_aligned;
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
......@@ -68,8 +69,8 @@ bool __read_mostly persistent_clock_exist = false;
static inline void tk_normalize_xtime(struct timekeeper *tk)
{
while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
tk->xtime_sec++;
}
}
......@@ -79,20 +80,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
struct timespec64 ts;
ts.tv_sec = tk->xtime_sec;
ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
return ts;
}
static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
{
tk->xtime_sec = ts->tv_sec;
tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
}
static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
{
tk->xtime_sec += ts->tv_sec;
tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
tk_normalize_xtime(tk);
}
......@@ -118,6 +119,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
tk->offs_boot = ktime_add(tk->offs_boot, delta);
}
#ifdef CONFIG_DEBUG_TIMEKEEPING
#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
/*
* These simple flag variables are managed
* without locks, which is racy, but ok since
* we don't really care about being super
* precise about how many events were seen,
* just that a problem was observed.
*/
static int timekeeping_underflow_seen;
static int timekeeping_overflow_seen;
/* last_warning is only modified under the timekeeping lock */
static long timekeeping_last_warning;
static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
{
cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
const char *name = tk->tkr_mono.clock->name;
if (offset > max_cycles) {
printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
offset, name, max_cycles);
printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
} else {
if (offset > (max_cycles >> 1)) {
printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
offset, name, max_cycles >> 1);
printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
}
}
if (timekeeping_underflow_seen) {
if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
printk_deferred(" Your kernel is probably still fine.\n");
timekeeping_last_warning = jiffies;
}
timekeeping_underflow_seen = 0;
}
if (timekeeping_overflow_seen) {
if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
printk_deferred(" Your kernel is probably still fine.\n");
timekeeping_last_warning = jiffies;
}
timekeeping_overflow_seen = 0;
}
}
static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
{
cycle_t now, last, mask, max, delta;
unsigned int seq;
/*
* Since we're called holding a seqlock, the data may shift
* under us while we're doing the calculation. This can cause
* false positives, since we'd note a problem but throw the
* results away. So nest another seqlock here to atomically
* grab the points we are checking with.
*/
do {
seq = read_seqcount_begin(&tk_core.seq);
now = tkr->read(tkr->clock);
last = tkr->cycle_last;
mask = tkr->mask;
max = tkr->clock->max_cycles;
} while (read_seqcount_retry(&tk_core.seq, seq));
delta = clocksource_delta(now, last, mask);
/*
* Try to catch underflows by checking if we are seeing small
* mask-relative negative values.
*/
if (unlikely((~delta & mask) < (mask >> 3))) {
timekeeping_underflow_seen = 1;
delta = 0;
}
/* Cap delta value to the max_cycles values to avoid mult overflows */
if (unlikely(delta > max)) {
timekeeping_overflow_seen = 1;
delta = tkr->clock->max_cycles;
}
return delta;
}
#else
static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
{
}
static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
{
cycle_t cycle_now, delta;
/* read clocksource */
cycle_now = tkr->read(tkr->clock);
/* calculate the delta since the last update_wall_time */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
return delta;
}
#endif
/**
* tk_setup_internals - Set up internals to use clocksource clock.
*
......@@ -135,11 +247,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
u64 tmp, ntpinterval;
struct clocksource *old_clock;
old_clock = tk->tkr.clock;
tk->tkr.clock = clock;
tk->tkr.read = clock->read;
tk->tkr.mask = clock->mask;
tk->tkr.cycle_last = tk->tkr.read(clock);
old_clock = tk->tkr_mono.clock;
tk->tkr_mono.clock = clock;
tk->tkr_mono.read = clock->read;
tk->tkr_mono.mask = clock->mask;
tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
tk->tkr_raw.clock = clock;
tk->tkr_raw.read = clock->read;
tk->tkr_raw.mask = clock->mask;
tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
/* Do the ns -> cycle conversion first, using original mult */
tmp = NTP_INTERVAL_LENGTH;
......@@ -163,11 +280,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
if (old_clock) {
int shift_change = clock->shift - old_clock->shift;
if (shift_change < 0)
tk->tkr.xtime_nsec >>= -shift_change;
tk->tkr_mono.xtime_nsec >>= -shift_change;
else
tk->tkr.xtime_nsec <<= shift_change;
tk->tkr_mono.xtime_nsec <<= shift_change;
}
tk->tkr.shift = clock->shift;
tk->tkr_raw.xtime_nsec = 0;
tk->tkr_mono.shift = clock->shift;
tk->tkr_raw.shift = clock->shift;
tk->ntp_error = 0;
tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
......@@ -178,7 +298,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
* active clocksource. These value will be adjusted via NTP
* to counteract clock drifting.
*/
tk->tkr.mult = clock->mult;
tk->tkr_mono.mult = clock->mult;
tk->tkr_raw.mult = clock->mult;
tk->ntp_err_mult = 0;
}
......@@ -193,14 +314,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
{
cycle_t cycle_now, delta;
cycle_t delta;
s64 nsec;
/* read clocksource: */
cycle_now = tkr->read(tkr->clock);
/* calculate the delta since the last update_wall_time: */
delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
delta = timekeeping_get_delta(tkr);
nsec = delta * tkr->mult + tkr->xtime_nsec;
nsec >>= tkr->shift;
......@@ -209,25 +326,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
return nsec + arch_gettimeoffset();
}
static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
{
struct clocksource *clock = tk->tkr.clock;
cycle_t cycle_now, delta;
s64 nsec;
/* read clocksource: */
cycle_now = tk->tkr.read(clock);
/* calculate the delta since the last update_wall_time: */
delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
/* convert delta to nanoseconds. */
nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
/* If arch requires, add in get_arch_timeoffset() */
return nsec + arch_gettimeoffset();
}
/**
* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
* @tkr: Timekeeping readout base from which we take the update
......@@ -267,18 +365,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
* slightly wrong timestamp (a few nanoseconds). See
* @ktime_get_mono_fast_ns.
*/
static void update_fast_timekeeper(struct tk_read_base *tkr)
static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
{
struct tk_read_base *base = tk_fast_mono.base;
struct tk_read_base *base = tkf->base;
/* Force readers off to base[1] */
raw_write_seqcount_latch(&tk_fast_mono.seq);
raw_write_seqcount_latch(&tkf->seq);
/* Update base[0] */
memcpy(base, tkr, sizeof(*base));
/* Force readers back to base[0] */
raw_write_seqcount_latch(&tk_fast_mono.seq);
raw_write_seqcount_latch(&tkf->seq);
/* Update base[1] */
memcpy(base + 1, base, sizeof(*base));
......@@ -316,22 +414,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
* of the following timestamps. Callers need to be aware of that and
* deal with it.
*/
u64 notrace ktime_get_mono_fast_ns(void)
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{
struct tk_read_base *tkr;
unsigned int seq;
u64 now;
do {
seq = raw_read_seqcount(&tk_fast_mono.seq);
tkr = tk_fast_mono.base + (seq & 0x01);
now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
seq = raw_read_seqcount(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
} while (read_seqcount_retry(&tkf->seq, seq));
} while (read_seqcount_retry(&tk_fast_mono.seq, seq));
return now;
}
u64 ktime_get_mono_fast_ns(void)
{
return __ktime_get_fast_ns(&tk_fast_mono);
}
EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
u64 ktime_get_raw_fast_ns(void)
{
return __ktime_get_fast_ns(&tk_fast_raw);
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
/* Suspend-time cycles value for halted fast timekeeper. */
static cycle_t cycles_at_suspend;
......@@ -353,12 +462,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
static void halt_fast_timekeeper(struct timekeeper *tk)
{
static struct tk_read_base tkr_dummy;
struct tk_read_base *tkr = &tk->tkr;
struct tk_read_base *tkr = &tk->tkr_mono;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
cycles_at_suspend = tkr->read(tkr->clock);
tkr_dummy.read = dummy_clock_read;
update_fast_timekeeper(&tkr_dummy);
update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
tkr = &tk->tkr_raw;
memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
tkr_dummy.read = dummy_clock_read;
update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
}
#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
......@@ -369,8 +483,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
xt = timespec64_to_timespec(tk_xtime(tk));
wm = timespec64_to_timespec(tk->wall_to_monotonic);
update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
tk->tkr.cycle_last);
update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
tk->tkr_mono.cycle_last);
}
static inline void old_vsyscall_fixup(struct timekeeper *tk)
......@@ -387,11 +501,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
* (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
* users are removed, this can be killed.
*/
remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
tk->tkr.xtime_nsec -= remainder;
tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
tk->tkr_mono.xtime_nsec -= remainder;
tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
tk->ntp_error += remainder << tk->ntp_error_shift;
tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
}
#else
#define old_vsyscall_fixup(tk)
......@@ -456,17 +570,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
*/
seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
nsec = (u32) tk->wall_to_monotonic.tv_nsec;
tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
/* Update the monotonic raw base */
tk->base_raw = timespec64_to_ktime(tk->raw_time);
tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
/*
* The sum of the nanoseconds portions of xtime and
* wall_to_monotonic can be greater/equal one second. Take
* this into account before updating tk->ktime_sec.
*/
nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
if (nsec >= NSEC_PER_SEC)
seconds++;
tk->ktime_sec = seconds;
......@@ -489,7 +603,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
memcpy(&shadow_timekeeper, &tk_core.timekeeper,
sizeof(tk_core.timekeeper));
update_fast_timekeeper(&tk->tkr);
update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
}
/**
......@@ -501,22 +616,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
*/
static void timekeeping_forward_now(struct timekeeper *tk)
{
struct clocksource *clock = tk->tkr.clock;
struct clocksource *clock = tk->tkr_mono.clock;
cycle_t cycle_now, delta;
s64 nsec;
cycle_now = tk->tkr.read(clock);
delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
tk->tkr.cycle_last = cycle_now;
cycle_now = tk->tkr_mono.read(clock);
delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
tk->tkr.xtime_nsec += delta * tk->tkr.mult;
tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
/* If arch requires, add in get_arch_timeoffset() */
tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
tk_normalize_xtime(tk);
nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
timespec64_add_ns(&tk->raw_time, nsec);
}
......@@ -537,7 +653,7 @@ int __getnstimeofday64(struct timespec64 *ts)
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
nsecs = timekeeping_get_ns(&tk->tkr);
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -577,8 +693,8 @@ ktime_t ktime_get(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr.base_mono;
nsecs = timekeeping_get_ns(&tk->tkr);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -603,8 +719,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = ktime_add(tk->tkr.base_mono, *offset);
nsecs = timekeeping_get_ns(&tk->tkr);
base = ktime_add(tk->tkr_mono.base, *offset);
nsecs = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -645,8 +761,8 @@ ktime_t ktime_get_raw(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->base_raw;
nsecs = timekeeping_get_ns_raw(tk);
base = tk->tkr_raw.base;
nsecs = timekeeping_get_ns(&tk->tkr_raw);
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -674,7 +790,7 @@ void ktime_get_ts64(struct timespec64 *ts)
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
nsec = timekeeping_get_ns(&tk->tkr);
nsec = timekeeping_get_ns(&tk->tkr_mono);
tomono = tk->wall_to_monotonic;
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -759,8 +875,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
ts_real->tv_sec = tk->xtime_sec;
ts_real->tv_nsec = 0;
nsecs_raw = timekeeping_get_ns_raw(tk);
nsecs_real = timekeeping_get_ns(&tk->tkr);
nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);
nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -943,7 +1059,7 @@ static int change_clocksource(void *data)
*/
if (try_module_get(new->owner)) {
if (!new->enable || new->enable(new) == 0) {
old = tk->tkr.clock;
old = tk->tkr_mono.clock;
tk_setup_internals(tk, new);
if (old->disable)
old->disable(old);
......@@ -971,11 +1087,11 @@ int timekeeping_notify(struct clocksource *clock)
{
struct timekeeper *tk = &tk_core.timekeeper;
if (tk->tkr.clock == clock)
if (tk->tkr_mono.clock == clock)
return 0;
stop_machine(change_clocksource, clock, NULL);
tick_clock_notify();
return tk->tkr.clock == clock ? 0 : -1;
return tk->tkr_mono.clock == clock ? 0 : -1;
}
/**
......@@ -993,7 +1109,7 @@ void getrawmonotonic64(struct timespec64 *ts)
do {
seq = read_seqcount_begin(&tk_core.seq);
nsecs = timekeeping_get_ns_raw(tk);
nsecs = timekeeping_get_ns(&tk->tkr_raw);
ts64 = tk->raw_time;
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -1016,7 +1132,7 @@ int timekeeping_valid_for_hres(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -1035,7 +1151,7 @@ u64 timekeeping_max_deferment(void)
do {
seq = read_seqcount_begin(&tk_core.seq);
ret = tk->tkr.clock->max_idle_ns;
ret = tk->tkr_mono.clock->max_idle_ns;
} while (read_seqcount_retry(&tk_core.seq, seq));
......@@ -1114,7 +1230,6 @@ void __init timekeeping_init(void)
tk_set_xtime(tk, &now);
tk->raw_time.tv_sec = 0;
tk->raw_time.tv_nsec = 0;
tk->base_raw.tv64 = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
boot = tk_xtime(tk);
......@@ -1200,7 +1315,7 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
void timekeeping_resume(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct clocksource *clock = tk->tkr.clock;
struct clocksource *clock = tk->tkr_mono.clock;
unsigned long flags;
struct timespec64 ts_new, ts_delta;
struct timespec tmp;
......@@ -1228,16 +1343,16 @@ void timekeeping_resume(void)
* The less preferred source will only be tried if there is no better
* usable source. The rtc part is handled separately in rtc core code.
*/
cycle_now = tk->tkr.read(clock);
cycle_now = tk->tkr_mono.read(clock);
if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
cycle_now > tk->tkr.cycle_last) {
cycle_now > tk->tkr_mono.cycle_last) {
u64 num, max = ULLONG_MAX;
u32 mult = clock->mult;
u32 shift = clock->shift;
s64 nsec = 0;
cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
tk->tkr.mask);
cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
tk->tkr_mono.mask);
/*
* "cycle_delta * mutl" may cause 64 bits overflow, if the
......@@ -1263,7 +1378,9 @@ void timekeeping_resume(void)
__timekeeping_inject_sleeptime(tk, &ts_delta);
/* Re-base the last cycle value */
tk->tkr.cycle_last = cycle_now;
tk->tkr_mono.cycle_last = cycle_now;
tk->tkr_raw.cycle_last = cycle_now;
tk->ntp_error = 0;
timekeeping_suspended = 0;
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
......@@ -1416,15 +1533,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
*
* XXX - TODO: Doc ntp_error calculation.
*/
if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
/* NTP adjustment caused clocksource mult overflow */
WARN_ON_ONCE(1);
return;
}
tk->tkr.mult += mult_adj;
tk->tkr_mono.mult += mult_adj;
tk->xtime_interval += interval;
tk->tkr.xtime_nsec -= offset;
tk->tkr_mono.xtime_nsec -= offset;
tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
}
......@@ -1486,13 +1603,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
tk->ntp_err_mult = 0;
}
if (unlikely(tk->tkr.clock->maxadj &&
(abs(tk->tkr.mult - tk->tkr.clock->mult)
> tk->tkr.clock->maxadj))) {
if (unlikely(tk->tkr_mono.clock->maxadj &&
(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
> tk->tkr_mono.clock->maxadj))) {
printk_once(KERN_WARNING
"Adjusting %s more than 11%% (%ld vs %ld)\n",
tk->tkr.clock->name, (long)tk->tkr.mult,
(long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
}
/*
......@@ -1509,9 +1626,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
* We'll correct this error next time through this function, when
* xtime_nsec is not as small.
*/
if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
s64 neg = -(s64)tk->tkr.xtime_nsec;
tk->tkr.xtime_nsec = 0;
if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
tk->tkr_mono.xtime_nsec = 0;
tk->ntp_error += neg << tk->ntp_error_shift;
}
}
......@@ -1526,13 +1643,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
*/
static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
{
u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
unsigned int clock_set = 0;
while (tk->tkr.xtime_nsec >= nsecps) {
while (tk->tkr_mono.xtime_nsec >= nsecps) {
int leap;
tk->tkr.xtime_nsec -= nsecps;
tk->tkr_mono.xtime_nsec -= nsecps;
tk->xtime_sec++;
/* Figure out if its a leap sec and apply if needed */
......@@ -1577,9 +1694,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
/* Accumulate one shifted interval */
offset -= interval;
tk->tkr.cycle_last += interval;
tk->tkr_mono.cycle_last += interval;
tk->tkr_raw.cycle_last += interval;
tk->tkr.xtime_nsec += tk->xtime_interval << shift;
tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
*clock_set |= accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
......@@ -1622,14 +1740,17 @@ void update_wall_time(void)
#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
offset = real_tk->cycle_interval;
#else
offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
tk->tkr.cycle_last, tk->tkr.mask);
offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
#endif
/* Check if there's really nothing to do */
if (offset < real_tk->cycle_interval)
goto out;
/* Do some additional sanity checking */
timekeeping_check_update(real_tk, offset);
/*
* With NO_HZ we may have to accumulate many cycle_intervals
* (think "ticks") worth of time at once. To do this efficiently,
......@@ -1784,8 +1905,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr.base_mono;
nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
base = tk->tkr_mono.base;
nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;
......@@ -1816,8 +1937,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
do {
seq = read_seqcount_begin(&tk_core.seq);
base = tk->tkr.base_mono;
nsecs = timekeeping_get_ns(&tk->tkr);
base = tk->tkr_mono.base;
nsecs = timekeeping_get_ns(&tk->tkr_mono);
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;
......
......@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
print_name_offset(m, dev->set_next_event);
SEQ_printf(m, "\n");
SEQ_printf(m, " set_mode: ");
print_name_offset(m, dev->set_mode);
SEQ_printf(m, "\n");
if (dev->set_mode) {
SEQ_printf(m, " set_mode: ");
print_name_offset(m, dev->set_mode);
SEQ_printf(m, "\n");
} else {
if (dev->set_mode_shutdown) {
SEQ_printf(m, " shutdown: ");
print_name_offset(m, dev->set_mode_shutdown);
SEQ_printf(m, "\n");
}
if (dev->set_mode_periodic) {
SEQ_printf(m, " periodic: ");
print_name_offset(m, dev->set_mode_periodic);
SEQ_printf(m, "\n");
}
if (dev->set_mode_oneshot) {
SEQ_printf(m, " oneshot: ");
print_name_offset(m, dev->set_mode_oneshot);
SEQ_printf(m, "\n");
}
if (dev->set_mode_resume) {
SEQ_printf(m, " resume: ");
print_name_offset(m, dev->set_mode_resume);
SEQ_printf(m, "\n");
}
}
SEQ_printf(m, " event_handler: ");
print_name_offset(m, dev->event_handler);
......
......@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
data corruption or a sporadic crash at a later stage once the region
is examined. The runtime overhead introduced is minimal.
config DEBUG_TIMEKEEPING
bool "Enable extra timekeeping sanity checking"
help
This option will enable additional timekeeping sanity checks
which may be helpful when diagnosing issues where timekeeping
problems are suspected.
This may include checks in the timekeeping hotpaths, so this
option may have a (very small) performance impact to some
workloads.
If unsure, say N.
config TIMER_STATS
bool "Collect kernel timers statistics"
depends on DEBUG_KERNEL && PROC_FS
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment