Commit dfe94d40 authored by Dexuan Cui's avatar Dexuan Cui Committed by Wei Liu

x86/hyperv: Fix kexec panic/hang issues

Currently the kexec kernel can panic or hang due to 2 causes:

1) hv_cpu_die() is not called upon kexec, so the hypervisor corrupts the
old VP Assist Pages when the kexec kernel runs. The same issue is fixed
for hibernation in commit 421f090c ("x86/hyperv: Suspend/resume the
VP assist page for hibernation"). Now fix it for kexec.

2) hyperv_cleanup() is called too early. In the kexec path, the other CPUs
are stopped in hv_machine_shutdown() -> native_machine_shutdown(), so
between hv_kexec_handler() and native_machine_shutdown(), the other CPUs
can still try to access the hypercall page and cause panic. The workaround
"hv_hypercall_pg = NULL;" in hyperv_cleanup() is unreliabe. Move
hyperv_cleanup() to a better place.
Signed-off-by: default avatarDexuan Cui <decui@microsoft.com>
Reviewed-by: default avatarMichael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20201222065541.24312-1-decui@microsoft.comSigned-off-by: default avatarWei Liu <wei.liu@kernel.org>
parent e71ba945
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <asm/hyperv-tlfs.h> #include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h> #include <asm/mshyperv.h>
#include <asm/idtentry.h> #include <asm/idtentry.h>
#include <linux/kexec.h>
#include <linux/version.h> #include <linux/version.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/mm.h> #include <linux/mm.h>
...@@ -26,6 +27,8 @@ ...@@ -26,6 +27,8 @@
#include <linux/syscore_ops.h> #include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h> #include <clocksource/hyperv_timer.h>
int hyperv_init_cpuhp;
void *hv_hypercall_pg; void *hv_hypercall_pg;
EXPORT_SYMBOL_GPL(hv_hypercall_pg); EXPORT_SYMBOL_GPL(hv_hypercall_pg);
...@@ -401,6 +404,7 @@ void __init hyperv_init(void) ...@@ -401,6 +404,7 @@ void __init hyperv_init(void)
register_syscore_ops(&hv_syscore_ops); register_syscore_ops(&hv_syscore_ops);
hyperv_init_cpuhp = cpuhp;
return; return;
remove_cpuhp_state: remove_cpuhp_state:
......
...@@ -74,6 +74,8 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} ...@@ -74,6 +74,8 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {}
#if IS_ENABLED(CONFIG_HYPERV) #if IS_ENABLED(CONFIG_HYPERV)
extern int hyperv_init_cpuhp;
extern void *hv_hypercall_pg; extern void *hv_hypercall_pg;
extern void __percpu **hyperv_pcpu_input_arg; extern void __percpu **hyperv_pcpu_input_arg;
......
...@@ -135,14 +135,32 @@ static void hv_machine_shutdown(void) ...@@ -135,14 +135,32 @@ static void hv_machine_shutdown(void)
{ {
if (kexec_in_progress && hv_kexec_handler) if (kexec_in_progress && hv_kexec_handler)
hv_kexec_handler(); hv_kexec_handler();
/*
* Call hv_cpu_die() on all the CPUs, otherwise later the hypervisor
* corrupts the old VP Assist Pages and can crash the kexec kernel.
*/
if (kexec_in_progress && hyperv_init_cpuhp > 0)
cpuhp_remove_state(hyperv_init_cpuhp);
/* The function calls stop_other_cpus(). */
native_machine_shutdown(); native_machine_shutdown();
/* Disable the hypercall page when there is only 1 active CPU. */
if (kexec_in_progress)
hyperv_cleanup();
} }
static void hv_machine_crash_shutdown(struct pt_regs *regs) static void hv_machine_crash_shutdown(struct pt_regs *regs)
{ {
if (hv_crash_handler) if (hv_crash_handler)
hv_crash_handler(regs); hv_crash_handler(regs);
/* The function calls crash_smp_send_stop(). */
native_machine_crash_shutdown(regs); native_machine_crash_shutdown(regs);
/* Disable the hypercall page when there is only 1 active CPU. */
hyperv_cleanup();
} }
#endif /* CONFIG_KEXEC_CORE */ #endif /* CONFIG_KEXEC_CORE */
#endif /* CONFIG_HYPERV */ #endif /* CONFIG_HYPERV */
......
...@@ -2550,7 +2550,6 @@ static void hv_kexec_handler(void) ...@@ -2550,7 +2550,6 @@ static void hv_kexec_handler(void)
/* Make sure conn_state is set as hv_synic_cleanup checks for it */ /* Make sure conn_state is set as hv_synic_cleanup checks for it */
mb(); mb();
cpuhp_remove_state(hyperv_cpuhp_online); cpuhp_remove_state(hyperv_cpuhp_online);
hyperv_cleanup();
}; };
static void hv_crash_handler(struct pt_regs *regs) static void hv_crash_handler(struct pt_regs *regs)
...@@ -2566,7 +2565,6 @@ static void hv_crash_handler(struct pt_regs *regs) ...@@ -2566,7 +2565,6 @@ static void hv_crash_handler(struct pt_regs *regs)
cpu = smp_processor_id(); cpu = smp_processor_id();
hv_stimer_cleanup(cpu); hv_stimer_cleanup(cpu);
hv_synic_disable_regs(cpu); hv_synic_disable_regs(cpu);
hyperv_cleanup();
}; };
static int hv_synic_suspend(void) static int hv_synic_suspend(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment