Commit 13e091b6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 timer updates from Thomas Gleixner:
 "Early TSC based time stamping to allow better boot time analysis.

  This comes with a general cleanup of the TSC calibration code which
  grew warts and duct taping over the years and removes 250 lines of
  code. Initiated and mostly implemented by Pavel with help from various
  folks"

* 'x86-timers-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits)
  x86/kvmclock: Mark kvm_get_preset_lpj() as __init
  x86/tsc: Consolidate init code
  sched/clock: Disable interrupts when calling generic_sched_clock_init()
  timekeeping: Prevent false warning when persistent clock is not available
  sched/clock: Close a hole in sched_clock_init()
  x86/tsc: Make use of tsc_calibrate_cpu_early()
  x86/tsc: Split native_calibrate_cpu() into early and late parts
  sched/clock: Use static key for sched_clock_running
  sched/clock: Enable sched clock early
  sched/clock: Move sched clock initialization and merge with generic clock
  x86/tsc: Use TSC as sched clock early
  x86/tsc: Initialize cyc2ns when tsc frequency is determined
  x86/tsc: Calibrate tsc only once
  ARM/time: Remove read_boot_clock64()
  s390/time: Remove read_boot_clock64()
  timekeeping: Default boot time offset to local_clock()
  timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset()
  s390/time: Add read_persistent_wall_and_boot_offset()
  x86/xen/time: Output xen sched_clock time from 0
  x86/xen/time: Initialize pv xen time in init_hypervisor_platform()
  ...
parents eac34119 1088c6ee
......@@ -2835,8 +2835,6 @@
nosync [HW,M68K] Disables sync negotiation for all devices.
notsc [BUGS=X86-32] Disable Time Stamp Counter
nowatchdog [KNL] Disable both lockup detectors, i.e.
soft-lockup and NMI watchdog (hard-lockup).
......
......@@ -92,9 +92,7 @@ APICs
Timing
notsc
Don't use the CPU time stamp counter to read the wall time.
This can be used to work around timing problems on multiprocessor systems
with not properly synchronized CPUs.
Deprecated, use tsc=unstable instead.
nohpet
Don't use the HPET timer.
......
......@@ -13,7 +13,6 @@
extern void timer_tick(void);
typedef void (*clock_access_fn)(struct timespec64 *);
extern int register_persistent_clock(clock_access_fn read_boot,
clock_access_fn read_persistent);
extern int register_persistent_clock(clock_access_fn read_persistent);
#endif
......@@ -83,29 +83,18 @@ static void dummy_clock_access(struct timespec64 *ts)
}
static clock_access_fn __read_persistent_clock = dummy_clock_access;
static clock_access_fn __read_boot_clock = dummy_clock_access;
void read_persistent_clock64(struct timespec64 *ts)
{
__read_persistent_clock(ts);
}
void read_boot_clock64(struct timespec64 *ts)
{
__read_boot_clock(ts);
}
int __init register_persistent_clock(clock_access_fn read_boot,
clock_access_fn read_persistent)
int __init register_persistent_clock(clock_access_fn read_persistent)
{
/* Only allow the clockaccess functions to be registered once */
if (__read_persistent_clock == dummy_clock_access &&
__read_boot_clock == dummy_clock_access) {
if (read_boot)
__read_boot_clock = read_boot;
if (__read_persistent_clock == dummy_clock_access) {
if (read_persistent)
__read_persistent_clock = read_persistent;
return 0;
}
......
......@@ -110,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
}
sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
register_persistent_clock(NULL, omap_read_persistent_clock64);
register_persistent_clock(omap_read_persistent_clock64);
pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
return 0;
......
......@@ -221,17 +221,22 @@ void read_persistent_clock64(struct timespec64 *ts)
ext_to_timespec64(clk, ts);
}
void read_boot_clock64(struct timespec64 *ts)
void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
struct timespec64 *boot_offset)
{
unsigned char clk[STORE_CLOCK_EXT_SIZE];
struct timespec64 boot_time;
__u64 delta;
delta = initial_leap_seconds + TOD_UNIX_EPOCH;
memcpy(clk, tod_clock_base, 16);
*(__u64 *) &clk[1] -= delta;
if (*(__u64 *) &clk[1] > delta)
memcpy(clk, tod_clock_base, STORE_CLOCK_EXT_SIZE);
*(__u64 *)&clk[1] -= delta;
if (*(__u64 *)&clk[1] > delta)
clk[0]--;
ext_to_timespec64(clk, ts);
ext_to_timespec64(clk, &boot_time);
read_persistent_clock64(wall_time);
*boot_offset = timespec64_sub(*wall_time, boot_time);
}
static u64 read_tod_clock(struct clocksource *cs)
......
......@@ -76,4 +76,17 @@
#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
/* Useful macros */
#define INTEL_CPU_FAM_ANY(_family, _model, _driver_data) \
{ \
.vendor = X86_VENDOR_INTEL, \
.family = _family, \
.model = _model, \
.feature = X86_FEATURE_ANY, \
.driver_data = (kernel_ulong_t)&_driver_data \
}
#define INTEL_CPU_FAM6(_model, _driver_data) \
INTEL_CPU_FAM_ANY(6, INTEL_FAM6_##_model, _driver_data)
#endif /* _ASM_X86_INTEL_FAMILY_H */
......@@ -80,35 +80,6 @@ enum intel_mid_cpu_type {
extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
/**
* struct intel_mid_ops - Interface between intel-mid & sub archs
* @arch_setup: arch_setup function to re-initialize platform
* structures (x86_init, x86_platform_init)
*
* This structure can be extended if any new interface is required
* between intel-mid & its sub arch files.
*/
struct intel_mid_ops {
void (*arch_setup)(void);
};
/* Helper API's for INTEL_MID_OPS_INIT */
#define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \
[cpuid] = get_##cpuname##_ops
/* Maximum number of CPU ops */
#define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *))
/*
* For every new cpu addition, a weak get_<cpuname>_ops() function needs be
* declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h.
*/
#define INTEL_MID_OPS_INIT { \
DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \
DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \
DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \
};
#ifdef CONFIG_X86_INTEL_MID
static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
......@@ -136,20 +107,6 @@ enum intel_mid_timer_options {
extern enum intel_mid_timer_options intel_mid_timer_options;
/*
* Penwell uses spread spectrum clock, so the freq number is not exactly
* the same as reported by MSR based on SDM.
*/
#define FSB_FREQ_83SKU 83200
#define FSB_FREQ_100SKU 99840
#define FSB_FREQ_133SKU 133000
#define FSB_FREQ_167SKU 167000
#define FSB_FREQ_200SKU 200000
#define FSB_FREQ_267SKU 267000
#define FSB_FREQ_333SKU 333000
#define FSB_FREQ_400SKU 400000
/* Bus Select SoC Fuse value */
#define BSEL_SOC_FUSE_MASK 0x7
/* FSB 133MHz */
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_KVM_GUEST_H
#define _ASM_X86_KVM_GUEST_H
int kvm_setup_vsyscall_timeinfo(void);
#endif /* _ASM_X86_KVM_GUEST_H */
......@@ -7,7 +7,6 @@
#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
#ifdef CONFIG_KVM_GUEST
bool kvm_check_and_clear_guest_paused(void);
......
......@@ -37,5 +37,6 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
extern int after_bootmem;
#endif /* _ASM_X86_TEXT_PATCHING_H */
......@@ -33,13 +33,13 @@ static inline cycles_t get_cycles(void)
extern struct system_counterval_t convert_art_to_tsc(u64 art);
extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
extern void tsc_early_delay_calibrate(void);
extern void tsc_early_init(void);
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
extern void mark_tsc_async_resets(char *reason);
extern unsigned long native_calibrate_cpu(void);
extern unsigned long native_calibrate_cpu_early(void);
extern unsigned long native_calibrate_tsc(void);
extern unsigned long long native_sched_clock_from_tsc(u64 tsc);
......
......@@ -668,6 +668,7 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
local_irq_save(flags);
memcpy(addr, opcode, len);
local_irq_restore(flags);
sync_core();
/* Could also do a CLFLUSH here to speed up CPU recovery; but
that causes hangs on some VIA CPUs. */
return addr;
......@@ -693,6 +694,12 @@ void *text_poke(void *addr, const void *opcode, size_t len)
struct page *pages[2];
int i;
/*
* While boot memory allocator is runnig we cannot use struct
* pages as they are not yet initialized.
*/
BUG_ON(!after_bootmem);
if (!core_kernel_text((unsigned long)addr)) {
pages[0] = vmalloc_to_page(addr);
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
......
......@@ -232,8 +232,6 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
}
}
set_cpu_cap(c, X86_FEATURE_K7);
/* calling is from identify_secondary_cpu() ? */
if (!c->cpu_index)
return;
......@@ -617,6 +615,14 @@ static void early_init_amd(struct cpuinfo_x86 *c)
early_init_amd_mc(c);
#ifdef CONFIG_X86_32
if (c->x86 == 6)
set_cpu_cap(c, X86_FEATURE_K7);
#endif
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
/*
......@@ -863,9 +869,6 @@ static void init_amd(struct cpuinfo_x86 *c)
init_amd_cacheinfo(c);
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
if (cpu_has(c, X86_FEATURE_XMM2)) {
unsigned long long val;
int ret;
......
......@@ -1018,6 +1018,24 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
}
/*
* The NOPL instruction is supposed to exist on all CPUs of family >= 6;
* unfortunately, that's not true in practice because of early VIA
* chips and (more importantly) broken virtualizers that are not easy
* to detect. In the latter case it doesn't even *fail* reliably, so
* probing for it doesn't even work. Disable it completely on 32-bit
* unless we can find a reliable way to detect all the broken cases.
* Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
*/
static void detect_nopl(void)
{
#ifdef CONFIG_X86_32
setup_clear_cpu_cap(X86_FEATURE_NOPL);
#else
setup_force_cpu_cap(X86_FEATURE_NOPL);
#endif
}
/*
* Do minimum CPU detection early.
* Fields really needed: vendor, cpuid_level, family, model, mask,
......@@ -1092,6 +1110,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
*/
if (!pgtable_l5_enabled())
setup_clear_cpu_cap(X86_FEATURE_LA57);
detect_nopl();
}
void __init early_cpu_init(void)
......@@ -1127,24 +1147,6 @@ void __init early_cpu_init(void)
early_identify_cpu(&boot_cpu_data);
}
/*
* The NOPL instruction is supposed to exist on all CPUs of family >= 6;
* unfortunately, that's not true in practice because of early VIA
* chips and (more importantly) broken virtualizers that are not easy
* to detect. In the latter case it doesn't even *fail* reliably, so
* probing for it doesn't even work. Disable it completely on 32-bit
* unless we can find a reliable way to detect all the broken cases.
* Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
*/
static void detect_nopl(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
clear_cpu_cap(c, X86_FEATURE_NOPL);
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
}
static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
......@@ -1207,8 +1209,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_model_name(c); /* Default name */
detect_nopl(c);
detect_null_seg_behavior(c);
/*
......
......@@ -37,15 +37,18 @@ static void bug_at(unsigned char *ip, int line)
BUG();
}
static void __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
void *(*poker)(void *, const void *, size_t),
int init)
static void __ref __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
void *(*poker)(void *, const void *, size_t),
int init)
{
union jump_code_union code;
const unsigned char default_nop[] = { STATIC_KEY_INIT_NOP };
const unsigned char *ideal_nop = ideal_nops[NOP_ATOMIC5];
if (early_boot_irqs_disabled)
poker = text_poke_early;
if (type == JUMP_LABEL_JMP) {
if (init) {
/*
......
......@@ -45,7 +45,6 @@
#include <asm/apic.h>
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
#include <asm/kvm_guest.h>
static int kvmapf = 1;
......@@ -66,15 +65,6 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
static int kvmclock_vsyscall = 1;
static int __init parse_no_kvmclock_vsyscall(char *arg)
{
kvmclock_vsyscall = 0;
return 0;
}
early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
......@@ -560,9 +550,6 @@ static void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
if (kvmclock_vsyscall)
kvm_setup_vsyscall_timeinfo();
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
......@@ -628,6 +615,7 @@ const __initconst struct hypervisor_x86 x86_hyper_kvm = {
.name = "KVM",
.detect = kvm_detect,
.type = X86_HYPER_KVM,
.init.init_platform = kvmclock_init,
.init.guest_late_init = kvm_guest_init,
.init.x2apic_available = kvm_para_available,
};
......
This diff is collapsed.
......@@ -866,6 +866,8 @@ void __init setup_arch(char **cmdline_p)
idt_setup_early_traps();
early_cpu_init();
arch_init_ideal_nops();
jump_label_init();
early_ioremap_init();
setup_olpc_ofw_pgd();
......@@ -1012,6 +1014,7 @@ void __init setup_arch(char **cmdline_p)
*/
init_hypervisor_platform();
tsc_early_init();
x86_init.resources.probe_roms();
/* after parse_early_param, so could debug it */
......@@ -1197,11 +1200,6 @@ void __init setup_arch(char **cmdline_p)
memblock_find_dma_reserve();
#ifdef CONFIG_KVM_GUEST
kvmclock_init();
#endif
tsc_early_delay_calibrate();
if (!early_xdbc_setup_hardware())
early_xdbc_register_console();
......@@ -1272,8 +1270,6 @@ void __init setup_arch(char **cmdline_p)
mcheck_init();
arch_init_ideal_nops();
register_refined_jiffies(CLOCK_TICK_RATE);
#ifdef CONFIG_EFI
......
......@@ -33,16 +33,13 @@ EXPORT_SYMBOL(cpu_khz);
unsigned int __read_mostly tsc_khz;
EXPORT_SYMBOL(tsc_khz);
#define KHZ 1000
/*
* TSC can be unstable due to cpufreq or due to unsynced TSCs
*/
static int __read_mostly tsc_unstable;
/* native_sched_clock() is called before tsc_init(), so
we must start with the TSC soft disabled to prevent
erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
static int __read_mostly tsc_disabled = -1;
static DEFINE_STATIC_KEY_FALSE(__use_tsc);
int tsc_clocksource_reliable;
......@@ -106,23 +103,6 @@ void cyc2ns_read_end(void)
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
static void cyc2ns_data_init(struct cyc2ns_data *data)
{
data->cyc2ns_mul = 0;
data->cyc2ns_shift = 0;
data->cyc2ns_offset = 0;
}
static void __init cyc2ns_init(int cpu)
{
struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
cyc2ns_data_init(&c2n->data[0]);
cyc2ns_data_init(&c2n->data[1]);
seqcount_init(&c2n->seq);
}
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
struct cyc2ns_data data;
......@@ -138,18 +118,11 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
return ns;
}
static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
static void __set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
{
unsigned long long ns_now;
struct cyc2ns_data data;
struct cyc2ns *c2n;
unsigned long flags;
local_irq_save(flags);
sched_clock_idle_sleep_event();
if (!khz)
goto done;
ns_now = cycles_2_ns(tsc_now);
......@@ -181,12 +154,55 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_
c2n->data[0] = data;
raw_write_seqcount_latch(&c2n->seq);
c2n->data[1] = data;
}
static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_now)
{
unsigned long flags;
local_irq_save(flags);
sched_clock_idle_sleep_event();
if (khz)
__set_cyc2ns_scale(khz, cpu, tsc_now);
done:
sched_clock_idle_wakeup_event();
local_irq_restore(flags);
}
/*
* Initialize cyc2ns for boot cpu
*/
static void __init cyc2ns_init_boot_cpu(void)
{
struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
seqcount_init(&c2n->seq);
__set_cyc2ns_scale(tsc_khz, smp_processor_id(), rdtsc());
}
/*
* Secondary CPUs do not run through tsc_init(), so set up
* all the scale factors for all CPUs, assuming the same
* speed as the bootup CPU. (cpufreq notifiers will fix this
* up if their speed diverges)
*/
static void __init cyc2ns_init_secondary_cpus(void)
{
unsigned int cpu, this_cpu = smp_processor_id();
struct cyc2ns *c2n = this_cpu_ptr(&cyc2ns);
struct cyc2ns_data *data = c2n->data;
for_each_possible_cpu(cpu) {
if (cpu != this_cpu) {
seqcount_init(&c2n->seq);
c2n = per_cpu_ptr(&cyc2ns, cpu);
c2n->data[0] = data[0];
c2n->data[1] = data[1];
}
}
}
/*
* Scheduler clock - returns current time in nanosec units.
*/
......@@ -248,8 +264,7 @@ EXPORT_SYMBOL_GPL(check_tsc_unstable);
#ifdef CONFIG_X86_TSC
int __init notsc_setup(char *str)
{
pr_warn("Kernel compiled with CONFIG_X86_TSC, cannot disable TSC completely\n");
tsc_disabled = 1;
mark_tsc_unstable("boot parameter notsc");
return 1;
}
#else
......@@ -665,30 +680,17 @@ static unsigned long cpu_khz_from_cpuid(void)
return eax_base_mhz * 1000;
}
/**
* native_calibrate_cpu - calibrate the cpu on boot
/*
* calibrate cpu using pit, hpet, and ptimer methods. They are available
* later in boot after acpi is initialized.
*/
unsigned long native_calibrate_cpu(void)
static unsigned long pit_hpet_ptimer_calibrate_cpu(void)
{
u64 tsc1, tsc2, delta, ref1, ref2;
unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
unsigned long flags, latch, ms, fast_calibrate;
unsigned long flags, latch, ms;
int hpet = is_hpet_enabled(), i, loopmin;
fast_calibrate = cpu_khz_from_cpuid();
if (fast_calibrate)
return fast_calibrate;
fast_calibrate = cpu_khz_from_msr();
if (fast_calibrate)
return fast_calibrate;
local_irq_save(flags);
fast_calibrate = quick_pit_calibrate();
local_irq_restore(flags);
if (fast_calibrate)
return fast_calibrate;
/*
* Run 5 calibration loops to get the lowest frequency value
* (the best estimate). We use two different calibration modes
......@@ -831,6 +833,37 @@ unsigned long native_calibrate_cpu(void)
return tsc_pit_min;
}
/**
* native_calibrate_cpu_early - can calibrate the cpu early in boot
*/
unsigned long native_calibrate_cpu_early(void)
{
unsigned long flags, fast_calibrate = cpu_khz_from_cpuid();
if (!fast_calibrate)
fast_calibrate = cpu_khz_from_msr();
if (!fast_calibrate) {
local_irq_save(flags);
fast_calibrate = quick_pit_calibrate();
local_irq_restore(flags);
}
return fast_calibrate;
}
/**
* native_calibrate_cpu - calibrate the cpu
*/
static unsigned long native_calibrate_cpu(void)
{
unsigned long tsc_freq = native_calibrate_cpu_early();
if (!tsc_freq)
tsc_freq = pit_hpet_ptimer_calibrate_cpu();
return tsc_freq;
}
void recalibrate_cpu_khz(void)
{
#ifndef CONFIG_SMP
......@@ -1307,7 +1340,7 @@ static void tsc_refine_calibration_work(struct work_struct *work)
static int __init init_tsc_clocksource(void)
{
if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
return 0;
if (tsc_unstable)
......@@ -1341,40 +1374,22 @@ static int __init init_tsc_clocksource(void)
*/
device_initcall(init_tsc_clocksource);
void __init tsc_early_delay_calibrate(void)
static bool __init determine_cpu_tsc_frequencies(bool early)
{
unsigned long lpj;
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
tsc_khz = tsc_khz ? : cpu_khz;
if (!tsc_khz)
return;
lpj = tsc_khz * 1000;
do_div(lpj, HZ);
loops_per_jiffy = lpj;
}
void __init tsc_init(void)
{
u64 lpj, cyc;
int cpu;
if (!boot_cpu_has(X86_FEATURE_TSC)) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
/* Make sure that cpu and tsc are not already calibrated */
WARN_ON(cpu_khz || tsc_khz);
if (early) {
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
} else {
/* We should not be here with non-native cpu calibration */
WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu);
cpu_khz = pit_hpet_ptimer_calibrate_cpu();
}
cpu_khz = x86_platform.calibrate_cpu();
tsc_khz = x86_platform.calibrate_tsc();
/*
* Trust non-zero tsc_khz as authorative,
* Trust non-zero tsc_khz as authoritative,
* and use it to sanity check cpu_khz,
* which will be off if system timer is off.
*/
......@@ -1383,52 +1398,78 @@ void __init tsc_init(void)
else if (abs(cpu_khz - tsc_khz) * 10 > tsc_khz)
cpu_khz = tsc_khz;
if (!tsc_khz) {
mark_tsc_unstable("could not calculate TSC khz");
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
if (tsc_khz == 0)
return false;
pr_info("Detected %lu.%03lu MHz processor\n",
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
(unsigned long)cpu_khz / KHZ,
(unsigned long)cpu_khz % KHZ);
if (cpu_khz != tsc_khz) {
pr_info("Detected %lu.%03lu MHz TSC",
(unsigned long)tsc_khz / 1000,
(unsigned long)tsc_khz % 1000);
(unsigned long)tsc_khz / KHZ,
(unsigned long)tsc_khz % KHZ);
}
return true;
}
static unsigned long __init get_loops_per_jiffy(void)
{
unsigned long lpj = tsc_khz * KHZ;
do_div(lpj, HZ);
return lpj;
}
static void __init tsc_enable_sched_clock(void)
{
/* Sanitize TSC ADJUST before cyc2ns gets initialized */
tsc_store_and_check_tsc_adjust(true);
cyc2ns_init_boot_cpu();
static_branch_enable(&__use_tsc);
}
void __init tsc_early_init(void)
{
if (!boot_cpu_has(X86_FEATURE_TSC))
return;
if (!determine_cpu_tsc_frequencies(true))
return;
loops_per_jiffy = get_loops_per_jiffy();
tsc_enable_sched_clock();
}
void __init tsc_init(void)
{
/*
* Secondary CPUs do not run through tsc_init(), so set up
* all the scale factors for all CPUs, assuming the same
* speed as the bootup CPU. (cpufreq notifiers will fix this
* up if their speed diverges)
* native_calibrate_cpu_early can only calibrate using methods that are
* available early in boot.
*/
cyc = rdtsc();
for_each_possible_cpu(cpu) {
cyc2ns_init(cpu);
set_cyc2ns_scale(tsc_khz, cpu, cyc);
}
if (x86_platform.calibrate_cpu == native_calibrate_cpu_early)
x86_platform.calibrate_cpu = native_calibrate_cpu;
if (tsc_disabled > 0)
if (!boot_cpu_has(X86_FEATURE_TSC)) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
/* now allow native_sched_clock() to use rdtsc */
if (!tsc_khz) {
/* We failed to determine frequencies earlier, try again */
if (!determine_cpu_tsc_frequencies(false)) {
mark_tsc_unstable("could not calculate TSC khz");
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
tsc_enable_sched_clock();
}
tsc_disabled = 0;
static_branch_enable(&__use_tsc);
cyc2ns_init_secondary_cpus();
if (!no_sched_irq_time)
enable_sched_clock_irqtime();
lpj = ((u64)tsc_khz * 1000);
do_div(lpj, HZ);
lpj_fine = lpj;
lpj_fine = get_loops_per_jiffy();
use_tsc_delay();
check_system_tsc_reliable();
......@@ -1455,7 +1496,7 @@ unsigned long calibrate_delay_is_known(void)
int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
const struct cpumask *mask = topology_core_cpumask(cpu);
if (tsc_disabled || !constant_tsc || !mask)
if (!constant_tsc || !mask)
return 0;
sibling = cpumask_any_but(mask, cpu);
......
// SPDX-License-Identifier: GPL-2.0
/*
* tsc_msr.c - TSC frequency enumeration via MSR
* TSC frequency enumeration via MSR
*
* Copyright (C) 2013 Intel Corporation
* Copyright (C) 2013, 2018 Intel Corporation
* Author: Bin Gao <bin.gao@intel.com>
*
* This file is released under the GPLv2.
*/
#include <linux/kernel.h>
#include <asm/processor.h>
#include <asm/setup.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/msr.h>
#include <asm/param.h>
#include <asm/tsc.h>
#define MAX_NUM_FREQS 9
......@@ -23,44 +25,48 @@
* field msr_plat does.
*/
struct freq_desc {
u8 x86_family; /* CPU family */
u8 x86_model; /* model */
u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */
u32 freqs[MAX_NUM_FREQS];
};
static struct freq_desc freq_desc_tables[] = {
/* PNW */
{ 6, 0x27, 0, { 0, 0, 0, 0, 0, 99840, 0, 83200 } },
/* CLV+ */
{ 6, 0x35, 0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 } },
/* TNG - Intel Atom processor Z3400 series */
{ 6, 0x4a, 1, { 0, 100000, 133300, 0, 0, 0, 0, 0 } },
/* VLV2 - Intel Atom processor E3000, Z3600, Z3700 series */
{ 6, 0x37, 1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 } },
/* ANN - Intel Atom processor Z3500 series */
{ 6, 0x5a, 1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 } },
/* AMT - Intel Atom processor X7-Z8000 and X5-Z8000 series */
{ 6, 0x4c, 1, { 83300, 100000, 133300, 116700,
80000, 93300, 90000, 88900, 87500 } },
/*
* Penwell and Clovertrail use spread spectrum clock,
* so the freq number is not exactly the same as reported
* by MSR based on SDM.
*/
static const struct freq_desc freq_desc_pnw = {
0, { 0, 0, 0, 0, 0, 99840, 0, 83200 }
};
static int match_cpu(u8 family, u8 model)
{
int i;
static const struct freq_desc freq_desc_clv = {
0, { 0, 133200, 0, 0, 0, 99840, 0, 83200 }
};
for (i = 0; i < ARRAY_SIZE(freq_desc_tables); i++) {
if ((family == freq_desc_tables[i].x86_family) &&
(model == freq_desc_tables[i].x86_model))
return i;
}
static const struct freq_desc freq_desc_byt = {
1, { 83300, 100000, 133300, 116700, 80000, 0, 0, 0 }
};
return -1;
}
static const struct freq_desc freq_desc_cht = {
1, { 83300, 100000, 133300, 116700, 80000, 93300, 90000, 88900, 87500 }
};
/* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
#define id_to_freq(cpu_index, freq_id) \
(freq_desc_tables[cpu_index].freqs[freq_id])
static const struct freq_desc freq_desc_tng = {
1, { 0, 100000, 133300, 0, 0, 0, 0, 0 }
};
static const struct freq_desc freq_desc_ann = {
1, { 83300, 100000, 133300, 100000, 0, 0, 0, 0 }
};
static const struct x86_cpu_id tsc_msr_cpu_ids[] = {
INTEL_CPU_FAM6(ATOM_PENWELL, freq_desc_pnw),
INTEL_CPU_FAM6(ATOM_CLOVERVIEW, freq_desc_clv),
INTEL_CPU_FAM6(ATOM_SILVERMONT1, freq_desc_byt),
INTEL_CPU_FAM6(ATOM_AIRMONT, freq_desc_cht),
INTEL_CPU_FAM6(ATOM_MERRIFIELD, freq_desc_tng),
INTEL_CPU_FAM6(ATOM_MOOREFIELD, freq_desc_ann),
{}
};
/*
* MSR-based CPU/TSC frequency discovery for certain CPUs.
......@@ -70,18 +76,17 @@ static int match_cpu(u8 family, u8 model)
*/
unsigned long cpu_khz_from_msr(void)
{
u32 lo, hi, ratio, freq_id, freq;
u32 lo, hi, ratio, freq;
const struct freq_desc *freq_desc;
const struct x86_cpu_id *id;
unsigned long res;
int cpu_index;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model);
if (cpu_index < 0)
id = x86_match_cpu(tsc_msr_cpu_ids);
if (!id)
return 0;
if (freq_desc_tables[cpu_index].msr_plat) {
freq_desc = (struct freq_desc *)id->driver_data;
if (freq_desc->msr_plat) {
rdmsr(MSR_PLATFORM_INFO, lo, hi);
ratio = (lo >> 8) & 0xff;
} else {
......@@ -91,8 +96,9 @@ unsigned long cpu_khz_from_msr(void)
/* Get FSB FREQ ID */
rdmsr(MSR_FSB_FREQ, lo, hi);
freq_id = lo & 0x7;
freq = id_to_freq(cpu_index, freq_id);
/* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */
freq = freq_desc->freqs[lo & 0x7];
/* TSC frequency = maximum resolved freq * maximum resolved bus ratio */
res = freq * ratio;
......
......@@ -109,7 +109,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
static void default_nmi_init(void) { };
struct x86_platform_ops x86_platform __ro_after_init = {
.calibrate_cpu = native_calibrate_cpu,
.calibrate_cpu = native_calibrate_cpu_early,
.calibrate_tsc = native_calibrate_tsc,
.get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss,
......
obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfld.o pwr.o
obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o pwr.o
# SFI specific code
ifdef CONFIG_X86_INTEL_MID
......
......@@ -36,8 +36,6 @@
#include <asm/apb_timer.h>
#include <asm/reboot.h>
#include "intel_mid_weak_decls.h"
/*
* the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
* cmdline option x86_intel_mid_timer can be used to override the configuration
......@@ -61,10 +59,6 @@
enum intel_mid_timer_options intel_mid_timer_options;
/* intel_mid_ops to store sub arch ops */
static struct intel_mid_ops *intel_mid_ops;
/* getter function for sub arch ops*/
static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT;
enum intel_mid_cpu_type __intel_mid_cpu_chip;
EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
......@@ -82,11 +76,6 @@ static void intel_mid_reboot(void)
intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
}
static unsigned long __init intel_mid_calibrate_tsc(void)
{
return 0;
}
static void __init intel_mid_setup_bp_timer(void)
{
apbt_time_init();
......@@ -133,6 +122,7 @@ static void intel_mid_arch_setup(void)
case 0x3C:
case 0x4A:
__intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER;
x86_platform.legacy.rtc = 1;
break;
case 0x27:
default:
......@@ -140,17 +130,7 @@ static void intel_mid_arch_setup(void)
break;
}
if (__intel_mid_cpu_chip < MAX_CPU_OPS(get_intel_mid_ops))
intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip]();
else {
intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL]();
pr_info("ARCH: Unknown SoC, assuming Penwell!\n");
}
out:
if (intel_mid_ops->arch_setup)
intel_mid_ops->arch_setup();
/*
* Intel MID platforms are using explicitly defined regulators.
*
......@@ -191,7 +171,6 @@ void __init x86_intel_mid_early_setup(void)
x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
x86_platform.calibrate_tsc = intel_mid_calibrate_tsc;
x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
x86_init.pci.arch_init = intel_mid_pci_init;
......
/*
* intel_mid_weak_decls.h: Weak declarations of intel-mid.c
*
* (C) Copyright 2013 Intel Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
/* For every CPU addition a new get_<cpuname>_ops interface needs
* to be added.
*/
extern void *get_penwell_ops(void);
extern void *get_cloverview_ops(void);
extern void *get_tangier_ops(void);
/*
* mfld.c: Intel Medfield platform setup code
*
* (C) Copyright 2013 Intel Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/init.h>
#include <asm/apic.h>
#include <asm/intel-mid.h>
#include <asm/intel_mid_vrtc.h>
#include "intel_mid_weak_decls.h"
static unsigned long __init mfld_calibrate_tsc(void)
{
unsigned long fast_calibrate;
u32 lo, hi, ratio, fsb;
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi);
ratio = (hi >> 8) & 0x1f;
pr_debug("ratio is %d\n", ratio);
if (!ratio) {
pr_err("read a zero ratio, should be incorrect!\n");
pr_err("force tsc ratio to 16 ...\n");
ratio = 16;
}
rdmsr(MSR_FSB_FREQ, lo, hi);
if ((lo & 0x7) == 0x7)
fsb = FSB_FREQ_83SKU;
else
fsb = FSB_FREQ_100SKU;
fast_calibrate = ratio * fsb;
pr_debug("read penwell tsc %lu khz\n", fast_calibrate);
lapic_timer_frequency = fsb * 1000 / HZ;
/*
* TSC on Intel Atom SoCs is reliable and of known frequency.
* See tsc_msr.c for details.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return fast_calibrate;
}
static void __init penwell_arch_setup(void)
{
x86_platform.calibrate_tsc = mfld_calibrate_tsc;
}
static struct intel_mid_ops penwell_ops = {
.arch_setup = penwell_arch_setup,
};
void *get_penwell_ops(void)
{
return &penwell_ops;
}
void *get_cloverview_ops(void)
{
return &penwell_ops;
}
/*
* Intel Merrifield platform specific setup code
*
* (C) Copyright 2013 Intel Corporation
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/init.h>
#include <asm/apic.h>
#include <asm/intel-mid.h>
#include "intel_mid_weak_decls.h"
static unsigned long __init tangier_calibrate_tsc(void)
{
unsigned long fast_calibrate;
u32 lo, hi, ratio, fsb, bus_freq;
/* *********************** */
/* Compute TSC:Ratio * FSB */
/* *********************** */
/* Compute Ratio */
rdmsr(MSR_PLATFORM_INFO, lo, hi);
pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo);
ratio = (lo >> 8) & 0xFF;
pr_debug("ratio is %d\n", ratio);
if (!ratio) {
pr_err("Read a zero ratio, force tsc ratio to 4 ...\n");
ratio = 4;
}
/* Compute FSB */
rdmsr(MSR_FSB_FREQ, lo, hi);
pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n",
hi, lo);
bus_freq = lo & 0x7;
pr_debug("bus_freq = 0x%x\n", bus_freq);
if (bus_freq == 0)
fsb = FSB_FREQ_100SKU;
else if (bus_freq == 1)
fsb = FSB_FREQ_100SKU;
else if (bus_freq == 2)
fsb = FSB_FREQ_133SKU;
else if (bus_freq == 3)
fsb = FSB_FREQ_167SKU;
else if (bus_freq == 4)
fsb = FSB_FREQ_83SKU;
else if (bus_freq == 5)
fsb = FSB_FREQ_400SKU;
else if (bus_freq == 6)
fsb = FSB_FREQ_267SKU;
else if (bus_freq == 7)
fsb = FSB_FREQ_333SKU;
else {
BUG();
pr_err("Invalid bus_freq! Setting to minimal value!\n");
fsb = FSB_FREQ_100SKU;
}
/* TSC = FSB Freq * Resolved HFM Ratio */
fast_calibrate = ratio * fsb;
pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate);
/* ************************************ */
/* Calculate Local APIC Timer Frequency */
/* ************************************ */
lapic_timer_frequency = (fsb * 1000) / HZ;
pr_debug("Setting lapic_timer_frequency = %d\n",
lapic_timer_frequency);
/*
* TSC on Intel Atom SoCs is reliable and of known frequency.
* See tsc_msr.c for details.
*/
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
return fast_calibrate;
}
static void __init tangier_arch_setup(void)
{
x86_platform.calibrate_tsc = tangier_calibrate_tsc;
x86_platform.legacy.rtc = 1;
}
/* tangier arch ops */
static struct intel_mid_ops tangier_ops = {
.arch_setup = tangier_arch_setup,
};
void *get_tangier_ops(void)
{
return &tangier_ops;
}
......@@ -119,6 +119,27 @@ static void __init xen_banner(void)
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
}
static void __init xen_pv_init_platform(void)
{
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
/* xen clock uses per-cpu vcpu_info, need to init it for boot cpu */
xen_vcpu_info_reset(0);
/* pvclock is in shared info area */
xen_init_time_ops();
}
static void __init xen_pv_guest_late_init(void)
{
#ifndef CONFIG_SMP
/* Setup shared vcpu info for non-smp configurations */
xen_setup_vcpu_info_placement();
#endif
}
/* Check if running on Xen version (major, minor) or later */
bool
xen_running_on_version_or_later(unsigned int major, unsigned int minor)
......@@ -947,34 +968,8 @@ static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
xen_write_msr_safe(msr, low, high);
}
void xen_setup_shared_info(void)
{
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
HYPERVISOR_shared_info =
(struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
xen_setup_mfn_list_list();
if (system_state == SYSTEM_BOOTING) {
#ifndef CONFIG_SMP
/*
* In UP this is as good a place as any to set up shared info.
* Limit this to boot only, at restore vcpu setup is done via
* xen_vcpu_restore().
*/
xen_setup_vcpu_info_placement();
#endif
/*
* Now that shared info is set up we can start using routines
* that point to pvclock area.
*/
xen_init_time_ops();
}
}
/* This is called once we have the cpu_possible_mask */
void __ref xen_setup_vcpu_info_placement(void)
void __init xen_setup_vcpu_info_placement(void)
{
int cpu;
......@@ -1228,6 +1223,8 @@ asmlinkage __visible void __init xen_start_kernel(void)
x86_init.irqs.intr_mode_init = x86_init_noop;
x86_init.oem.arch_setup = xen_arch_setup;
x86_init.oem.banner = xen_banner;
x86_init.hyper.init_platform = xen_pv_init_platform;
x86_init.hyper.guest_late_init = xen_pv_guest_late_init;
/*
* Set up some pagetable state before starting to set any ptes.
......
......@@ -1230,8 +1230,7 @@ static void __init xen_pagetable_p2m_free(void)
* We roundup to the PMD, which means that if anybody at this stage is
* using the __ka address of xen_start_info or
* xen_start_info->shared_info they are in going to crash. Fortunatly
* we have already revectored in xen_setup_kernel_pagetable and in
* xen_setup_shared_info.
* we have already revectored in xen_setup_kernel_pagetable.
*/
size = roundup(size, PMD_SIZE);
......@@ -1292,8 +1291,7 @@ static void __init xen_pagetable_init(void)
/* Remap memory freed due to conflicts with E820 map */
xen_remap_memory();
xen_setup_shared_info();
xen_setup_mfn_list_list();
}
static void xen_write_cr2(unsigned long cr2)
{
......
......@@ -27,8 +27,9 @@ void xen_pv_pre_suspend(void)
void xen_pv_post_suspend(int suspend_cancelled)
{
xen_build_mfn_list_list();
xen_setup_shared_info();
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
xen_setup_mfn_list_list();
if (suspend_cancelled) {
xen_start_info->store_mfn =
......
......@@ -31,6 +31,8 @@
/* Xen may fire a timer up to this many ns early */
#define TIMER_SLOP 100000
static u64 xen_sched_clock_offset __read_mostly;
/* Get the TSC speed from Xen */
static unsigned long xen_tsc_khz(void)
{
......@@ -40,7 +42,7 @@ static unsigned long xen_tsc_khz(void)
return pvclock_tsc_khz(info);
}
u64 xen_clocksource_read(void)
static u64 xen_clocksource_read(void)
{
struct pvclock_vcpu_time_info *src;
u64 ret;
......@@ -57,6 +59,11 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs)
return xen_clocksource_read();
}
static u64 xen_sched_clock(void)
{
return xen_clocksource_read() - xen_sched_clock_offset;
}
static void xen_read_wallclock(struct timespec64 *ts)
{
struct shared_info *s = HYPERVISOR_shared_info;
......@@ -367,7 +374,7 @@ void xen_timer_resume(void)
}
static const struct pv_time_ops xen_time_ops __initconst = {
.sched_clock = xen_clocksource_read,
.sched_clock = xen_sched_clock,
.steal_clock = xen_steal_clock,
};
......@@ -503,8 +510,9 @@ static void __init xen_time_init(void)
pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
}
void __ref xen_init_time_ops(void)
void __init xen_init_time_ops(void)
{
xen_sched_clock_offset = xen_clocksource_read();
pv_time_ops = xen_time_ops;
x86_init.timers.timer_init = xen_time_init;
......@@ -542,11 +550,11 @@ void __init xen_hvm_init_time_ops(void)
return;
if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
printk(KERN_INFO "Xen doesn't support pvclock on HVM,"
"disable pv timer\n");
pr_info("Xen doesn't support pvclock on HVM, disable pv timer");
return;
}
xen_sched_clock_offset = xen_clocksource_read();
pv_time_ops = xen_time_ops;
x86_init.timers.setup_percpu_clockev = xen_time_init;
x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
......
......@@ -31,7 +31,6 @@ extern struct shared_info xen_dummy_shared_info;
extern struct shared_info *HYPERVISOR_shared_info;
void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void);
void xen_build_mfn_list_list(void);
void xen_setup_machphys_mapping(void);
void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
......@@ -68,12 +67,11 @@ void xen_init_irq_ops(void);
void xen_setup_timer(int cpu);
void xen_setup_runstate_info(int cpu);
void xen_teardown_timer(int cpu);
u64 xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void);
void xen_save_time_memory_area(void);
void xen_restore_time_memory_area(void);
void __ref xen_init_time_ops(void);
void __init xen_hvm_init_time_ops(void);
void xen_init_time_ops(void);
void xen_hvm_init_time_ops(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
......
......@@ -259,6 +259,6 @@ static int __init tegra20_init_rtc(struct device_node *np)
else
clk_prepare_enable(clk);
return register_persistent_clock(NULL, tegra_read_persistent_clock64);
return register_persistent_clock(tegra_read_persistent_clock64);
}
TIMER_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
......@@ -9,17 +9,16 @@
#define LINUX_SCHED_CLOCK
#ifdef CONFIG_GENERIC_SCHED_CLOCK
extern void sched_clock_postinit(void);
extern void generic_sched_clock_init(void);
extern void sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate);
#else
static inline void sched_clock_postinit(void) { }
static inline void generic_sched_clock_init(void) { }
static inline void sched_clock_register(u64 (*read)(void), int bits,
unsigned long rate)
{
;
}
#endif
......
......@@ -243,7 +243,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
extern int persistent_clock_is_local;
extern void read_persistent_clock64(struct timespec64 *ts);
extern void read_boot_clock64(struct timespec64 *ts);
void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock,
struct timespec64 *boot_offset);
extern int update_persistent_clock64(struct timespec64 now);
/*
......
......@@ -79,7 +79,7 @@
#include <linux/pti.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/sched_clock.h>
#include <linux/sched/clock.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/context_tracking.h>
......@@ -642,7 +642,6 @@ asmlinkage __visible void __init start_kernel(void)
softirq_init();
timekeeping_init();
time_init();
sched_clock_postinit();
printk_safe_init();
perf_event_init();
profile_init();
......@@ -697,6 +696,7 @@ asmlinkage __visible void __init start_kernel(void)
acpi_early_init();
if (late_time_init)
late_time_init();
sched_clock_init();
calibrate_delay();
pid_idr_init();
anon_vma_init();
......
......@@ -53,6 +53,7 @@
*
*/
#include "sched.h"
#include <linux/sched_clock.h>
/*
* Scheduler clock - returns current time in nanosec units.
......@@ -66,12 +67,7 @@ unsigned long long __weak sched_clock(void)
}
EXPORT_SYMBOL_GPL(sched_clock);
__read_mostly int sched_clock_running;
void sched_clock_init(void)
{
sched_clock_running = 1;
}
static DEFINE_STATIC_KEY_FALSE(sched_clock_running);
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
/*
......@@ -195,17 +191,40 @@ void clear_sched_clock_stable(void)
smp_mb(); /* matches sched_clock_init_late() */
if (sched_clock_running == 2)
if (static_key_count(&sched_clock_running.key) == 2)
__clear_sched_clock_stable();
}
static void __sched_clock_gtod_offset(void)
{
struct sched_clock_data *scd = this_scd();
__scd_stamp(scd);
__gtod_offset = (scd->tick_raw + __sched_clock_offset) - scd->tick_gtod;
}
void __init sched_clock_init(void)
{
/*
* Set __gtod_offset such that once we mark sched_clock_running,
* sched_clock_tick() continues where sched_clock() left off.
*
* Even if TSC is buggered, we're still UP at this point so it
* can't really be out of sync.
*/
local_irq_disable();
__sched_clock_gtod_offset();
local_irq_enable();
static_branch_inc(&sched_clock_running);
}
/*
* We run this as late_initcall() such that it runs after all built-in drivers,
* notably: acpi_processor and intel_idle, which can mark the TSC as unstable.
*/
static int __init sched_clock_init_late(void)
{
sched_clock_running = 2;
static_branch_inc(&sched_clock_running);
/*
* Ensure that it is impossible to not do a static_key update.
*
......@@ -350,8 +369,8 @@ u64 sched_clock_cpu(int cpu)
if (sched_clock_stable())
return sched_clock() + __sched_clock_offset;
if (unlikely(!sched_clock_running))
return 0ull;
if (!static_branch_unlikely(&sched_clock_running))
return sched_clock();
preempt_disable_notrace();
scd = cpu_sdc(cpu);
......@@ -373,7 +392,7 @@ void sched_clock_tick(void)
if (sched_clock_stable())
return;
if (unlikely(!sched_clock_running))
if (!static_branch_unlikely(&sched_clock_running))
return;
lockdep_assert_irqs_disabled();
......@@ -385,8 +404,6 @@ void sched_clock_tick(void)
void sched_clock_tick_stable(void)
{
u64 gtod, clock;
if (!sched_clock_stable())
return;
......@@ -398,9 +415,7 @@ void sched_clock_tick_stable(void)
* TSC to be unstable, any computation will be computing crap.
*/
local_irq_disable();
gtod = ktime_get_ns();
clock = sched_clock();
__gtod_offset = (clock + __sched_clock_offset) - gtod;
__sched_clock_gtod_offset();
local_irq_enable();
}
......@@ -434,9 +449,17 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
void __init sched_clock_init(void)
{
static_branch_inc(&sched_clock_running);
local_irq_disable();
generic_sched_clock_init();
local_irq_enable();
}
u64 sched_clock_cpu(int cpu)
{
if (unlikely(!sched_clock_running))
if (!static_branch_unlikely(&sched_clock_running))
return 0;
return sched_clock();
......
......@@ -5916,7 +5916,6 @@ void __init sched_init(void)
int i, j;
unsigned long alloc_size = 0, ptr;
sched_clock_init();
wait_bit_init();
#ifdef CONFIG_FAIR_GROUP_SCHED
......
......@@ -622,8 +622,6 @@ void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
#undef PU
}
extern __read_mostly int sched_clock_running;
static void print_cpu(struct seq_file *m, int cpu)
{
struct rq *rq = cpu_rq(cpu);
......
......@@ -237,7 +237,7 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
pr_debug("Registered %pF as sched_clock source\n", read);
}
void __init sched_clock_postinit(void)
void __init generic_sched_clock_init(void)
{
/*
* If no sched_clock() function has been provided at that point,
......
......@@ -17,6 +17,7 @@
#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/sched/loadavg.h>
#include <linux/sched/clock.h>
#include <linux/syscore_ops.h>
#include <linux/clocksource.h>
#include <linux/jiffies.h>
......@@ -1505,18 +1506,23 @@ void __weak read_persistent_clock64(struct timespec64 *ts64)
}
/**
* read_boot_clock64 - Return time of the system start.
* read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
* from the boot.
*
* Weak dummy function for arches that do not yet support it.
* Function to read the exact time the system has been started.
* Returns a timespec64 with tv_sec=0 and tv_nsec=0 if unsupported.
*
* XXX - Do be sure to remove it once all arches implement it.
* wall_time - current time as returned by persistent clock
* boot_offset - offset that is defined as wall_time - boot_time
* The default function calculates offset based on the current value of
* local_clock(). This way architectures that support sched_clock() but don't
* support dedicated boot time clock will provide the best estimate of the
* boot time.
*/
void __weak read_boot_clock64(struct timespec64 *ts)
void __weak __init
read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
struct timespec64 *boot_offset)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
read_persistent_clock64(wall_time);
*boot_offset = ns_to_timespec64(local_clock());
}
/*
......@@ -1542,28 +1548,29 @@ static bool persistent_clock_exists;
*/
void __init timekeeping_init(void)
{
struct timespec64 wall_time, boot_offset, wall_to_mono;
struct timekeeper *tk = &tk_core.timekeeper;
struct clocksource *clock;
unsigned long flags;
struct timespec64 now, boot, tmp;
read_persistent_clock64(&now);
if (!timespec64_valid_strict(&now)) {
pr_warn("WARNING: Persistent clock returned invalid value!\n"
" Check your CMOS/BIOS settings.\n");
now.tv_sec = 0;
now.tv_nsec = 0;
} else if (now.tv_sec || now.tv_nsec)
persistent_clock_exists = true;
read_boot_clock64(&boot);
if (!timespec64_valid_strict(&boot)) {
pr_warn("WARNING: Boot clock returned invalid value!\n"
" Check your CMOS/BIOS settings.\n");
boot.tv_sec = 0;
boot.tv_nsec = 0;
read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
if (timespec64_valid_strict(&wall_time) &&
timespec64_to_ns(&wall_time) > 0) {
persistent_clock_exists = true;
} else if (timespec64_to_ns(&wall_time) != 0) {
pr_warn("Persistent clock returned invalid value");
wall_time = (struct timespec64){0};
}
if (timespec64_compare(&wall_time, &boot_offset) < 0)
boot_offset = (struct timespec64){0};
/*
* We want set wall_to_mono, so the following is true:
* wall time + wall_to_mono = boot time
*/
wall_to_mono = timespec64_sub(boot_offset, wall_time);
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
ntp_init();
......@@ -1573,13 +1580,10 @@ void __init timekeeping_init(void)
clock->enable(clock);
tk_setup_internals(tk, clock);
tk_set_xtime(tk, &now);
tk_set_xtime(tk, &wall_time);
tk->raw_sec = 0;
if (boot.tv_sec == 0 && boot.tv_nsec == 0)
boot = tk_xtime(tk);
set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
tk_set_wall_to_mono(tk, tmp);
tk_set_wall_to_mono(tk, wall_to_mono);
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment