Commit fbe1bf1e authored by Linus Torvalds's avatar Linus Torvalds

Revert "x86/smp: Put CPUs into INIT on shutdown if possible"

This reverts commit 45e34c8a, and the
two subsequent fixes to it:

  3f874c9b ("x86/smp: Don't send INIT to non-present and non-booted CPUs")
  b1472a60 ("x86/smp: Don't send INIT to boot CPU")

because it seems to result in hung machines at shutdown.  Particularly
some Dell machines, but Thomas says

 "The rest seems to be Lenovo and Sony with Alderlake/Raptorlake CPUs -
  at least that's what I could figure out from the various bug reports.

  I don't know which CPUs the DELL machines have, so I can't say it's a
  pattern.

  I agree with the revert for now"

Ashok Raj chimes in:

 "There was a report (probably this same one), and it turns out it was a
  bug in the BIOS SMI handler.

  The client BIOS's were waiting for the lowest APICID to be the SMI
  rendevous master. If this is MeteorLake, the BSP wasn't the one with
  the lowest APIC and it triped here.

  The BIOS change is also being pushed to others for assimilation :)

  Server BIOS's had this correctly for a while now"

and it does look likely to be some bad interaction between SMI and the
non-BSP cores having put into INIT (and thus unresponsive until reset).

Link: https://bbs.archlinux.org/viewtopic.php?pid=2124429
Link: https://www.reddit.com/r/openSUSE/comments/16qq99b/tumbleweed_shutdown_did_not_finish_completely/
Link: https://forum.artixlinux.org/index.php/topic,5997.0.html
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2241279Acked-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Ashok Raj <ashok.raj@intel.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5720c43d
...@@ -129,7 +129,6 @@ void native_smp_send_reschedule(int cpu); ...@@ -129,7 +129,6 @@ void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask); void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu); void native_send_call_func_single_ipi(int cpu);
bool smp_park_other_cpus_in_init(void);
void smp_store_cpu_info(int id); void smp_store_cpu_info(int id);
asmlinkage __visible void smp_reboot_interrupt(void); asmlinkage __visible void smp_reboot_interrupt(void);
......
...@@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) ...@@ -131,7 +131,7 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
} }
/* /*
* Disable virtualization, APIC etc. and park the CPU in a HLT loop * this function calls the 'stop' function on all other CPUs in the system.
*/ */
DEFINE_IDTENTRY_SYSVEC(sysvec_reboot) DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{ {
...@@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait) ...@@ -172,17 +172,13 @@ static void native_stop_other_cpus(int wait)
* 2) Wait for all other CPUs to report that they reached the * 2) Wait for all other CPUs to report that they reached the
* HLT loop in stop_this_cpu() * HLT loop in stop_this_cpu()
* *
* 3) If the system uses INIT/STARTUP for CPU bringup, then * 3) If #2 timed out send an NMI to the CPUs which did not
* send all present CPUs an INIT vector, which brings them * yet report
* completely out of the way.
* *
* 4) If #3 is not possible and #2 timed out send an NMI to the * 4) Wait for all other CPUs to report that they reached the
* CPUs which did not yet report
*
* 5) Wait for all other CPUs to report that they reached the
* HLT loop in stop_this_cpu() * HLT loop in stop_this_cpu()
* *
* #4 can obviously race against a CPU reaching the HLT loop late. * #3 can obviously race against a CPU reaching the HLT loop late.
* That CPU will have reported already and the "have all CPUs * That CPU will have reported already and the "have all CPUs
* reached HLT" condition will be true despite the fact that the * reached HLT" condition will be true despite the fact that the
* other CPU is still handling the NMI. Again, there is no * other CPU is still handling the NMI. Again, there is no
...@@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait) ...@@ -198,7 +194,7 @@ static void native_stop_other_cpus(int wait)
/* /*
* Don't wait longer than a second for IPI completion. The * Don't wait longer than a second for IPI completion. The
* wait request is not checked here because that would * wait request is not checked here because that would
* prevent an NMI/INIT shutdown in case that not all * prevent an NMI shutdown attempt in case that not all
* CPUs reach shutdown state. * CPUs reach shutdown state.
*/ */
timeout = USEC_PER_SEC; timeout = USEC_PER_SEC;
...@@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait) ...@@ -206,27 +202,7 @@ static void native_stop_other_cpus(int wait)
udelay(1); udelay(1);
} }
/* /* if the REBOOT_VECTOR didn't work, try with the NMI */
* Park all other CPUs in INIT including "offline" CPUs, if
* possible. That's a safe place where they can't resume execution
* of HLT and then execute the HLT loop from overwritten text or
* page tables.
*
* The only downside is a broadcast MCE, but up to the point where
* the kexec() kernel brought all APs online again an MCE will just
* make HLT resume and handle the MCE. The machine crashes and burns
* due to overwritten text, page tables and data. So there is a
* choice between fire and frying pan. The result is pretty much
* the same. Chose frying pan until x86 provides a sane mechanism
* to park a CPU.
*/
if (smp_park_other_cpus_in_init())
goto done;
/*
* If park with INIT was not possible and the REBOOT_VECTOR didn't
* take all secondary CPUs offline, try with the NMI.
*/
if (!cpumask_empty(&cpus_stop_mask)) { if (!cpumask_empty(&cpus_stop_mask)) {
/* /*
* If NMI IPI is enabled, try to register the stop handler * If NMI IPI is enabled, try to register the stop handler
...@@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait) ...@@ -249,7 +225,6 @@ static void native_stop_other_cpus(int wait)
udelay(1); udelay(1);
} }
done:
local_irq_save(flags); local_irq_save(flags);
disable_local_APIC(); disable_local_APIC();
mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
......
...@@ -1240,33 +1240,6 @@ void arch_thaw_secondary_cpus_end(void) ...@@ -1240,33 +1240,6 @@ void arch_thaw_secondary_cpus_end(void)
cache_aps_init(); cache_aps_init();
} }
bool smp_park_other_cpus_in_init(void)
{
unsigned int cpu, this_cpu = smp_processor_id();
unsigned int apicid;
if (apic->wakeup_secondary_cpu_64 || apic->wakeup_secondary_cpu)
return false;
/*
* If this is a crash stop which does not execute on the boot CPU,
* then this cannot use the INIT mechanism because INIT to the boot
* CPU will reset the machine.
*/
if (this_cpu)
return false;
for_each_cpu_and(cpu, &cpus_booted_once_mask, cpu_present_mask) {
if (cpu == this_cpu)
continue;
apicid = apic->cpu_present_to_apicid(cpu);
if (apicid == BAD_APICID)
continue;
send_init_sequence(apicid);
}
return true;
}
/* /*
* Early setup to make printk work. * Early setup to make printk work.
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment