Commit 64d6a120 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 hyperv updates from Ingo Molnar:
 "Misc updates to the hyperv guest code:

   - Rework clockevents initialization to better support hibernation

   - Allow guests to enable InvariantTSC

   - Micro-optimize send_ipi_one"

* 'x86-hyperv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/hyperv: Initialize clockevents earlier in CPU onlining
  x86/hyperv: Allow guests to enable InvariantTSC
  x86/hyperv: Micro-optimize send_ipi_one()
parents cd4771f7 4df4cb9e
...@@ -194,10 +194,20 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) ...@@ -194,10 +194,20 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector)
static bool __send_ipi_one(int cpu, int vector) static bool __send_ipi_one(int cpu, int vector)
{ {
struct cpumask mask = CPU_MASK_NONE; int vp = hv_cpu_number_to_vp_number(cpu);
cpumask_set_cpu(cpu, &mask); trace_hyperv_send_ipi_one(cpu, vector);
return __send_ipi_mask(&mask, vector);
if (!hv_hypercall_pg || (vp == VP_INVAL))
return false;
if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
return false;
if (vp >= 64)
return __send_ipi_mask_ex(cpumask_of(cpu), vector);
return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
} }
static void hv_send_ipi(int cpu, int vector) static void hv_send_ipi(int cpu, int vector)
......
...@@ -311,6 +311,12 @@ void __init hyperv_init(void) ...@@ -311,6 +311,12 @@ void __init hyperv_init(void)
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
/*
* Ignore any errors in setting up stimer clockevents
* as we can run with the LAPIC timer as a fallback.
*/
(void)hv_stimer_alloc();
hv_apic_init(); hv_apic_init();
x86_init.pci.arch_init = hv_pci_init; x86_init.pci.arch_init = hv_pci_init;
......
...@@ -86,6 +86,8 @@ ...@@ -86,6 +86,8 @@
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11) #define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
/* AccessReenlightenmentControls privilege */ /* AccessReenlightenmentControls privilege */
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) #define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
/* AccessTscInvariantControls privilege */
#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)
/* /*
* Feature identification: indicates which flags were specified at partition * Feature identification: indicates which flags were specified at partition
...@@ -278,6 +280,9 @@ ...@@ -278,6 +280,9 @@
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
/* TSC invariant control */
#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
/* /*
* Declare the MSR used to setup pages used to communicate with the hypervisor. * Declare the MSR used to setup pages used to communicate with the hypervisor.
*/ */
......
...@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask, ...@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
__entry->ncpus, __entry->vector) __entry->ncpus, __entry->vector)
); );
TRACE_EVENT(hyperv_send_ipi_one,
TP_PROTO(int cpu,
int vector),
TP_ARGS(cpu, vector),
TP_STRUCT__entry(
__field(int, cpu)
__field(int, vector)
),
TP_fast_assign(__entry->cpu = cpu;
__entry->vector = vector;
),
TP_printk("cpu %d vector %x",
__entry->cpu, __entry->vector)
);
#endif /* CONFIG_HYPERV */ #endif /* CONFIG_HYPERV */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
......
...@@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void) ...@@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void)
machine_ops.shutdown = hv_machine_shutdown; machine_ops.shutdown = hv_machine_shutdown;
machine_ops.crash_shutdown = hv_machine_crash_shutdown; machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif #endif
if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) {
wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
} else {
mark_tsc_unstable("running on Hyper-V"); mark_tsc_unstable("running on Hyper-V");
}
/* /*
* Generation 2 instances don't support reading the NMI status from * Generation 2 instances don't support reading the NMI status from
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/clocksource.h> #include <linux/clocksource.h>
#include <linux/sched_clock.h> #include <linux/sched_clock.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/cpuhotplug.h>
#include <clocksource/hyperv_timer.h> #include <clocksource/hyperv_timer.h>
#include <asm/hyperv-tlfs.h> #include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h> #include <asm/mshyperv.h>
...@@ -30,6 +31,15 @@ static u64 hv_sched_clock_offset __ro_after_init; ...@@ -30,6 +31,15 @@ static u64 hv_sched_clock_offset __ro_after_init;
* mechanism is used when running on older versions of Hyper-V * mechanism is used when running on older versions of Hyper-V
* that don't support Direct Mode. While Hyper-V provides * that don't support Direct Mode. While Hyper-V provides
* four stimer's per CPU, Linux uses only stimer0. * four stimer's per CPU, Linux uses only stimer0.
*
* Because Direct Mode does not require processing a VMbus
* message, stimer interrupts can be enabled earlier in the
* process of booting a CPU, and consistent with when timer
* interrupts are enabled for other clocksource drivers.
* However, for legacy versions of Hyper-V when Direct Mode
* is not enabled, setting up stimer interrupts must be
* delayed until VMbus is initialized and can process the
* interrupt message.
*/ */
static bool direct_mode_enabled; static bool direct_mode_enabled;
...@@ -102,17 +112,12 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) ...@@ -102,17 +112,12 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
/* /*
* hv_stimer_init - Per-cpu initialization of the clockevent * hv_stimer_init - Per-cpu initialization of the clockevent
*/ */
void hv_stimer_init(unsigned int cpu) static int hv_stimer_init(unsigned int cpu)
{ {
struct clock_event_device *ce; struct clock_event_device *ce;
/* if (!hv_clock_event)
* Synthetic timers are always available except on old versions of return 0;
* Hyper-V on x86. In that case, just return as Linux will use a
* clocksource based on emulated PIT or LAPIC timer hardware.
*/
if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
return;
ce = per_cpu_ptr(hv_clock_event, cpu); ce = per_cpu_ptr(hv_clock_event, cpu);
ce->name = "Hyper-V clockevent"; ce->name = "Hyper-V clockevent";
...@@ -127,28 +132,55 @@ void hv_stimer_init(unsigned int cpu) ...@@ -127,28 +132,55 @@ void hv_stimer_init(unsigned int cpu)
HV_CLOCK_HZ, HV_CLOCK_HZ,
HV_MIN_DELTA_TICKS, HV_MIN_DELTA_TICKS,
HV_MAX_MAX_DELTA_TICKS); HV_MAX_MAX_DELTA_TICKS);
return 0;
} }
EXPORT_SYMBOL_GPL(hv_stimer_init);
/* /*
* hv_stimer_cleanup - Per-cpu cleanup of the clockevent * hv_stimer_cleanup - Per-cpu cleanup of the clockevent
*/ */
void hv_stimer_cleanup(unsigned int cpu) int hv_stimer_cleanup(unsigned int cpu)
{ {
struct clock_event_device *ce; struct clock_event_device *ce;
/* Turn off clockevent device */ if (!hv_clock_event)
if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) { return 0;
/*
* In the legacy case where Direct Mode is not enabled
* (which can only be on x86/64), stimer cleanup happens
* relatively early in the CPU offlining process. We
* must unbind the stimer-based clockevent device so
* that the LAPIC timer can take over until clockevents
* are no longer needed in the offlining process. Note
* that clockevents_unbind_device() eventually calls
* hv_ce_shutdown().
*
* The unbind should not be done when Direct Mode is
* enabled because we may be on an architecture where
* there are no other clockevent devices to fallback to.
*/
ce = per_cpu_ptr(hv_clock_event, cpu); ce = per_cpu_ptr(hv_clock_event, cpu);
if (direct_mode_enabled)
hv_ce_shutdown(ce); hv_ce_shutdown(ce);
} else
clockevents_unbind_device(ce, cpu);
return 0;
} }
EXPORT_SYMBOL_GPL(hv_stimer_cleanup); EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
/* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
int hv_stimer_alloc(int sint) int hv_stimer_alloc(void)
{ {
int ret; int ret = 0;
/*
* Synthetic timers are always available except on old versions of
* Hyper-V on x86. In that case, return as error as Linux will use a
* clockevent based on emulated LAPIC timer hardware.
*/
if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE))
return -EINVAL;
hv_clock_event = alloc_percpu(struct clock_event_device); hv_clock_event = alloc_percpu(struct clock_event_device);
if (!hv_clock_event) if (!hv_clock_event)
...@@ -159,22 +191,78 @@ int hv_stimer_alloc(int sint) ...@@ -159,22 +191,78 @@ int hv_stimer_alloc(int sint)
if (direct_mode_enabled) { if (direct_mode_enabled) {
ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
hv_stimer0_isr); hv_stimer0_isr);
if (ret) { if (ret)
goto free_percpu;
/*
* Since we are in Direct Mode, stimer initialization
* can be done now with a CPUHP value in the same range
* as other clockevent devices.
*/
ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
"clockevents/hyperv/stimer:starting",
hv_stimer_init, hv_stimer_cleanup);
if (ret < 0)
goto free_stimer0_irq;
}
return ret;
free_stimer0_irq:
hv_remove_stimer0_irq(stimer0_irq);
stimer0_irq = 0;
free_percpu:
free_percpu(hv_clock_event); free_percpu(hv_clock_event);
hv_clock_event = NULL; hv_clock_event = NULL;
return ret; return ret;
} }
} EXPORT_SYMBOL_GPL(hv_stimer_alloc);
/*
* hv_stimer_legacy_init -- Called from the VMbus driver to handle
* the case when Direct Mode is not enabled, and the stimer
* must be initialized late in the CPU onlining process.
*
*/
void hv_stimer_legacy_init(unsigned int cpu, int sint)
{
if (direct_mode_enabled)
return;
/*
* This function gets called by each vCPU, so setting the
* global stimer_message_sint value each time is conceptually
* not ideal, but the value passed in is always the same and
* it avoids introducing yet another interface into this
* clocksource driver just to set the sint in the legacy case.
*/
stimer0_message_sint = sint; stimer0_message_sint = sint;
return 0; (void)hv_stimer_init(cpu);
} }
EXPORT_SYMBOL_GPL(hv_stimer_alloc); EXPORT_SYMBOL_GPL(hv_stimer_legacy_init);
/*
* hv_stimer_legacy_cleanup -- Called from the VMbus driver to
* handle the case when Direct Mode is not enabled, and the
* stimer must be cleaned up early in the CPU offlining
* process.
*/
void hv_stimer_legacy_cleanup(unsigned int cpu)
{
if (direct_mode_enabled)
return;
(void)hv_stimer_cleanup(cpu);
}
EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */ /* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
void hv_stimer_free(void) void hv_stimer_free(void)
{ {
if (direct_mode_enabled && (stimer0_irq != 0)) { if (!hv_clock_event)
return;
if (direct_mode_enabled) {
cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
hv_remove_stimer0_irq(stimer0_irq); hv_remove_stimer0_irq(stimer0_irq);
stimer0_irq = 0; stimer0_irq = 0;
} }
...@@ -190,14 +278,20 @@ EXPORT_SYMBOL_GPL(hv_stimer_free); ...@@ -190,14 +278,20 @@ EXPORT_SYMBOL_GPL(hv_stimer_free);
void hv_stimer_global_cleanup(void) void hv_stimer_global_cleanup(void)
{ {
int cpu; int cpu;
struct clock_event_device *ce;
if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) { /*
* hv_stime_legacy_cleanup() will stop the stimer if Direct
* Mode is not enabled, and fallback to the LAPIC timer.
*/
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
ce = per_cpu_ptr(hv_clock_event, cpu); hv_stimer_legacy_cleanup(cpu);
clockevents_unbind_device(ce, cpu);
}
} }
/*
* If Direct Mode is enabled, the cpuhp teardown callback
* (hv_stimer_cleanup) will be run on all CPUs to stop the
* stimers.
*/
hv_stimer_free(); hv_stimer_free();
} }
EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
......
...@@ -202,7 +202,7 @@ int hv_synic_init(unsigned int cpu) ...@@ -202,7 +202,7 @@ int hv_synic_init(unsigned int cpu)
{ {
hv_synic_enable_regs(cpu); hv_synic_enable_regs(cpu);
hv_stimer_init(cpu); hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
return 0; return 0;
} }
...@@ -277,7 +277,7 @@ int hv_synic_cleanup(unsigned int cpu) ...@@ -277,7 +277,7 @@ int hv_synic_cleanup(unsigned int cpu)
if (channel_found && vmbus_connection.conn_state == CONNECTED) if (channel_found && vmbus_connection.conn_state == CONNECTED)
return -EBUSY; return -EBUSY;
hv_stimer_cleanup(cpu); hv_stimer_legacy_cleanup(cpu);
hv_synic_disable_regs(cpu); hv_synic_disable_regs(cpu);
......
...@@ -1340,10 +1340,6 @@ static int vmbus_bus_init(void) ...@@ -1340,10 +1340,6 @@ static int vmbus_bus_init(void)
if (ret) if (ret)
goto err_alloc; goto err_alloc;
ret = hv_stimer_alloc(VMBUS_MESSAGE_SINT);
if (ret < 0)
goto err_alloc;
/* /*
* Initialize the per-cpu interrupt state and stimer state. * Initialize the per-cpu interrupt state and stimer state.
* Then connect to the host. * Then connect to the host.
...@@ -1400,9 +1396,8 @@ static int vmbus_bus_init(void) ...@@ -1400,9 +1396,8 @@ static int vmbus_bus_init(void)
err_connect: err_connect:
cpuhp_remove_state(hyperv_cpuhp_online); cpuhp_remove_state(hyperv_cpuhp_online);
err_cpuhp: err_cpuhp:
hv_stimer_free();
err_alloc:
hv_synic_free(); hv_synic_free();
err_alloc:
hv_remove_vmbus_irq(); hv_remove_vmbus_irq();
bus_unregister(&hv_bus); bus_unregister(&hv_bus);
...@@ -2315,20 +2310,23 @@ static void hv_crash_handler(struct pt_regs *regs) ...@@ -2315,20 +2310,23 @@ static void hv_crash_handler(struct pt_regs *regs)
static int hv_synic_suspend(void) static int hv_synic_suspend(void)
{ {
/* /*
* When we reach here, all the non-boot CPUs have been offlined, and * When we reach here, all the non-boot CPUs have been offlined.
* the stimers on them have been unbound in hv_synic_cleanup() -> * If we're in a legacy configuration where stimer Direct Mode is
* not enabled, the stimers on the non-boot CPUs have been unbound
* in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() ->
* hv_stimer_cleanup() -> clockevents_unbind_device(). * hv_stimer_cleanup() -> clockevents_unbind_device().
* *
* hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here * hv_synic_suspend() only runs on CPU0 with interrupts disabled.
* we do not unbind the stimer on CPU0 because: 1) it's unnecessary * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because:
* because the interrupts remain disabled between syscore_suspend() * 1) it's unnecessary as interrupts remain disabled between
* and syscore_resume(): see create_image() and resume_target_kernel(); * syscore_suspend() and syscore_resume(): see create_image() and
* resume_target_kernel()
* 2) the stimer on CPU0 is automatically disabled later by * 2) the stimer on CPU0 is automatically disabled later by
* syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
* -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning * -> clockevents_shutdown() -> ... -> hv_ce_shutdown()
* would be triggered if we call clockevents_unbind_device(), which * 3) a warning would be triggered if we call
* may sleep, in an interrupts-disabled context. So, we intentionally * clockevents_unbind_device(), which may sleep, in an
* don't call hv_stimer_cleanup(0) here. * interrupts-disabled context.
*/ */
hv_synic_disable_regs(0); hv_synic_disable_regs(0);
......
...@@ -21,10 +21,11 @@ ...@@ -21,10 +21,11 @@
#define HV_MIN_DELTA_TICKS 1 #define HV_MIN_DELTA_TICKS 1
/* Routines called by the VMbus driver */ /* Routines called by the VMbus driver */
extern int hv_stimer_alloc(int sint); extern int hv_stimer_alloc(void);
extern void hv_stimer_free(void); extern void hv_stimer_free(void);
extern void hv_stimer_init(unsigned int cpu); extern int hv_stimer_cleanup(unsigned int cpu);
extern void hv_stimer_cleanup(unsigned int cpu); extern void hv_stimer_legacy_init(unsigned int cpu, int sint);
extern void hv_stimer_legacy_cleanup(unsigned int cpu);
extern void hv_stimer_global_cleanup(void); extern void hv_stimer_global_cleanup(void);
extern void hv_stimer0_isr(void); extern void hv_stimer0_isr(void);
......
...@@ -129,6 +129,7 @@ enum cpuhp_state { ...@@ -129,6 +129,7 @@ enum cpuhp_state {
CPUHP_AP_ARC_TIMER_STARTING, CPUHP_AP_ARC_TIMER_STARTING,
CPUHP_AP_RISCV_TIMER_STARTING, CPUHP_AP_RISCV_TIMER_STARTING,
CPUHP_AP_CSKY_TIMER_STARTING, CPUHP_AP_CSKY_TIMER_STARTING,
CPUHP_AP_HYPERV_TIMER_STARTING,
CPUHP_AP_KVM_STARTING, CPUHP_AP_KVM_STARTING,
CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
CPUHP_AP_KVM_ARM_VGIC_STARTING, CPUHP_AP_KVM_ARM_VGIC_STARTING,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment