Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Thomas Gleixner: "The RAS updates for the 4.13 merge window: - Cleanup of the MCE injection facility (Borsilav Petkov) - Rework of the AMD/SMCA handling (Yazen Ghannam) - Enhancements for ACPI/APEI to handle new notitication types (Shiju Jose) - atomic_t to refcount_t conversion (Elena Reshetova) - A few fixes and enhancements all over the place" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: RAS/CEC: Check the correct variable in the debugfs error handling x86/mce: Always save severity in machine_check_poll() x86/MCE, xen/mcelog: Make /dev/mcelog registration messages more precise x86/mce: Update bootlog description to reflect behavior on AMD x86/mce: Don't disable MCA banks when offlining a CPU on AMD x86/mce/mce-inject: Preset the MCE injection struct x86/mce: Clean up include files x86/mce: Get rid of register_mce_write_callback() x86/mce: Merge mce_amd_inj into mce-inject x86/mce/AMD: Use saved threshold block info in interrupt handler x86/mce/AMD: Use msr_stat when clearing MCA_STATUS x86/mce/AMD: Carve out SMCA bank configuration x86/mce/AMD: Redo error logging from APIC LVT interrupt handlers x86/mce: Convert threshold_bank.cpus from atomic_t to refcount_t RAS: Make local function parse_ras_param() static ACPI/APEI: Handle GSIV and GPIO notification types

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Thomas Gleixner: "The RAS updates for the 4.13 merge window: - Cleanup of the MCE injection facility (Borsilav Petkov) - Rework of the AMD/SMCA handling (Yazen Ghannam) - Enhancements for ACPI/APEI to handle new notitication types (Shiju Jose) - atomic_t to refcount_t conversion (Elena Reshetova) - A few fixes and enhancements all over the place" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: RAS/CEC: Check the correct variable in the debugfs error handling x86/mce: Always save severity in machine_check_poll() x86/MCE, xen/mcelog: Make /dev/mcelog registration messages more precise x86/mce: Update bootlog description to reflect behavior on AMD x86/mce: Don't disable MCA banks when offlining a CPU on AMD x86/mce/mce-inject: Preset the MCE injection struct x86/mce: Clean up include files x86/mce: Get rid of register_mce_write_callback() x86/mce: Merge mce_amd_inj into mce-inject x86/mce/AMD: Use saved threshold block info in interrupt handler x86/mce/AMD: Use msr_stat when clearing MCA_STATUS x86/mce/AMD: Carve out SMCA bank configuration x86/mce/AMD: Redo error logging from APIC LVT interrupt handlers x86/mce: Convert threshold_bank.cpus from atomic_t to refcount_t RAS: Make local function parse_ras_param() static ACPI/APEI: Handle GSIV and GPIO notification types
4422d80e · Linus Torvalds · 9a9594ef · 32288daf · 4422d80e · 4422d80e
Commit 4422d80e authored Jul 03, 2017 by Linus Torvalds
18 changed files
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -36,7 +36,8 @@ Machine check
 		to broadcast MCEs.
   mce=bootlog
 		Enable logging of machine checks left over from booting.
-		Disabled by default on AMD because some BIOS leave bogus ones.
+		Disabled by default on AMD Fam10h and older because some BIOS
+		leave bogus ones.
 		If your BIOS doesn't do that it's a good idea to enable though
 		to make sure you log even machine check events that result
 		in a reboot. On Intel systems it is enabled by default.

--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1085,7 +1085,7 @@ config X86_MCE_THRESHOLD
 	def_bool y

 config X86_MCE_INJECT
-	depends on X86_MCE && X86_LOCAL_APIC && X86_MCELOG_LEGACY
+	depends on X86_MCE && X86_LOCAL_APIC && DEBUG_FS
 	tristate "Machine check injector support"
 	---help---
 	  Provide support for injecting machine checks for testing purposes.

--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -257,8 +257,6 @@ drivers-$(CONFIG_PM) += arch/x86/power/

 drivers-$(CONFIG_FB) += arch/x86/video/

-drivers-$(CONFIG_RAS) += arch/x86/ras/
-
 ####
 # boot loader support. Several targets are kept for legacy purposes


--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -3,6 +3,7 @@

 #include <linux/ioport.h>
 #include <linux/pci.h>
+#include <linux/refcount.h>

 struct amd_nb_bus_dev_range {
 	u8 bus;
@@ -55,7 +56,7 @@ struct threshold_bank {
 	struct threshold_block	*blocks;

 	/* initialized to the number of CPUs on the node sharing this bank */
-	atomic_t		cpus;
+	refcount_t		cpus;
 };

 struct amd_northbridge {

--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -285,10 +285,6 @@ int mce_notify_irq(void);

 DECLARE_PER_CPU(struct mce, injectm);

-extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
-				    const char __user *ubuf,
-				    size_t usize, loff_t *off));
-
 /* Disable CMCI/polling for MCA bank claimed by firmware */
 extern void mce_disable_bank(int bank);


--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -907,8 +907,13 @@ static inline int mpx_disable_management(void)
 }
 #endif /* CONFIG_X86_INTEL_MPX */

+#ifdef CONFIG_CPU_SUP_AMD
 extern u16 amd_get_nb_id(int cpu);
 extern u32 amd_get_nodes_per_socket(void);
+#else
+static inline u16 amd_get_nb_id(int cpu)		{ return 0; }
+static inline u32 amd_get_nodes_per_socket(void)	{ return 0; }
+#endif

 static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 {

--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -17,6 +17,8 @@

 #include "mce-internal.h"

+static BLOCKING_NOTIFIER_HEAD(mce_injector_chain);
+
 static DEFINE_MUTEX(mce_chrdev_read_mutex);

 static char mce_helper[128];
@@ -345,24 +347,49 @@ static long mce_chrdev_ioctl(struct file *f, unsigned int cmd,
 	}
 }

-static ssize_t (*mce_write)(struct file *filp, const char __user *ubuf,
-			    size_t usize, loff_t *off);
+void mce_register_injector_chain(struct notifier_block *nb)
+{
+	blocking_notifier_chain_register(&mce_injector_chain, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_injector_chain);

-void register_mce_write_callback(ssize_t (*fn)(struct file *filp,
-			     const char __user *ubuf,
-			     size_t usize, loff_t *off))
+void mce_unregister_injector_chain(struct notifier_block *nb)
 {
-	mce_write = fn;
+	blocking_notifier_chain_unregister(&mce_injector_chain, nb);
 }
-EXPORT_SYMBOL_GPL(register_mce_write_callback);
+EXPORT_SYMBOL_GPL(mce_unregister_injector_chain);

 static ssize_t mce_chrdev_write(struct file *filp, const char __user *ubuf,
 				size_t usize, loff_t *off)
 {
-	if (mce_write)
-		return mce_write(filp, ubuf, usize, off);
-	else
+	struct mce m;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	/*
+	 * There are some cases where real MSR reads could slip
+	 * through.
+	 */
+	if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA))
+		return -EIO;
+
+	if ((unsigned long)usize > sizeof(struct mce))
+		usize = sizeof(struct mce);
+	if (copy_from_user(&m, ubuf, usize))
+		return -EFAULT;
+
+	if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
 		return -EINVAL;
+
+	/*
+	 * Need to give user space some time to set everything up,
+	 * so do it a jiffie or two later everywhere.
+	 */
+	schedule_timeout(2);
+
+	blocking_notifier_call_chain(&mce_injector_chain, 0, &m);
+
+	return usize;
 }

 static const struct file_operations mce_chrdev_ops = {
@@ -388,9 +415,15 @@ static __init int dev_mcelog_init_device(void)
 	/* register character device /dev/mcelog */
 	err = misc_register(&mce_chrdev_device);
 	if (err) {
+		if (err == -EBUSY)
+			/* Xen dom0 might have registered the device already. */
+			pr_info("Unable to init device /dev/mcelog, already registered");
+		else
 			pr_err("Unable to init device /dev/mcelog (rc: %d)\n", err);
+
 		return err;
 	}
+
 	mce_register_decode_chain(&dev_mcelog_nb);
 	return 0;
 }

--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -100,7 +100,11 @@ static inline bool mce_cmp(struct mce *m1, struct mce *m2)
 extern struct device_attribute dev_attr_trigger;

 #ifdef CONFIG_X86_MCELOG_LEGACY
-extern void mce_work_trigger(void);
+void mce_work_trigger(void);
+void mce_register_injector_chain(struct notifier_block *nb);
+void mce_unregister_injector_chain(struct notifier_block *nb);
 #else
 static inline void mce_work_trigger(void)	{ }
+static inline void mce_register_injector_chain(struct notifier_block *nb)	{ }
+static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 #endif
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -673,7 +673,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
 	bool error_seen = false;
 	struct mce m;
-	int severity;
 	int i;

 	this_cpu_inc(mce_poll_count);
@@ -710,11 +709,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)

 		mce_read_aux(&m, i);

-		severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
-
-		if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m))
-			if (m.status & MCI_STATUS_ADDRV)
-				m.severity = severity;
+		m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);

 		/*
 		 * Don't get the IP here because it's unlikely to
@@ -1550,7 +1545,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
 			 */
 			clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
 		}
-		if (c->x86 < 17 && cfg->bootlog < 0) {
+		if (c->x86 < 0x11 && cfg->bootlog < 0) {
 			/*
 			 * Lots of broken BIOS around that don't clear them
 			 * by default and leave crap in there. Don't log:
@@ -1832,7 +1827,8 @@ void mce_disable_bank(int bank)
 * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above)
 *	monarchtimeout is how long to wait for other CPUs on machine
 *	check, or 0 to not wait
- * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+ * mce=bootlog Log MCEs from before booting. Disabled by default on AMD Fam10h
+	and older.
 * mce=nobootlog Don't log MCEs from before booting.
 * mce=bios_cmci_threshold Don't program the CMCI threshold
 * mce=recovery force enable memcpy_mcsafe()
@@ -1912,12 +1908,13 @@ static void mce_disable_error_reporting(void)
 static void vendor_disable_error_reporting(void)
 {
 	/*
-	 * Don't clear on Intel CPUs. Some of these MSRs are socket-wide.
+	 * Don't clear on Intel or AMD CPUs. Some of these MSRs are socket-wide.
 	 * Disabling them for just a single offlined CPU is bad, since it will
 	 * inhibit reporting for all shared resources on the socket like the
 	 * last level cache (LLC), the integrated memory controller (iMC), etc.
 	 */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
+	    boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
 		return;

 	mce_disable_error_reporting();

--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
-config MCE_AMD_INJ
-	tristate "Simple MCE injection interface for AMD processors"
-	depends on RAS && X86_MCE && DEBUG_FS && AMD_NB
-	default n
-	help
-	  This is a simple debugfs interface to inject MCEs and test different
-	  aspects of the MCE handling code.
-
-	  WARNING: Do not even assume this interface is staying stable!
-
 config RAS_CEC
 	bool "Correctable Errors Collector"
 	depends on X86_MCE && MEMORY_FAILURE && DEBUG_FS
@@ -20,4 +10,3 @@ config RAS_CEC

 	  Bear in mind that this is absolutely useless if your platform doesn't
 	  have ECC DIMMs and doesn't have DRAM ECC checking enabled in the BIOS.
-
--- a/arch/x86/ras/Makefile
+++ b/arch/x86/ras/Makefile
-obj-$(CONFIG_MCE_AMD_INJ)		+= mce_amd_inj.o
-
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -89,14 +89,14 @@ bool ghes_disable;
 module_param_named(disable, ghes_disable, bool, 0);

 /*
- * All error sources notified with SCI shares one notifier function,
- * so they need to be linked and checked one by one.  This is applied
- * to NMI too.
+ * All error sources notified with HED (Hardware Error Device) share a
+ * single notifier callback, so they need to be linked and checked one
+ * by one. This holds true for NMI too.
 *
 * RCU is used for these lists, so ghes_list_mutex is only used for
 * list changing, not for traversing.
 */
-static LIST_HEAD(ghes_sci);
+static LIST_HEAD(ghes_hed);
 static DEFINE_MUTEX(ghes_list_mutex);

 /*
@@ -702,14 +702,14 @@ static irqreturn_t ghes_irq_func(int irq, void *data)
 	return IRQ_HANDLED;
 }

-static int ghes_notify_sci(struct notifier_block *this,
-				  unsigned long event, void *data)
+static int ghes_notify_hed(struct notifier_block *this, unsigned long event,
+			   void *data)
 {
 	struct ghes *ghes;
 	int ret = NOTIFY_DONE;

 	rcu_read_lock();
-	list_for_each_entry_rcu(ghes, &ghes_sci, list) {
+	list_for_each_entry_rcu(ghes, &ghes_hed, list) {
 		if (!ghes_proc(ghes))
 			ret = NOTIFY_OK;
 	}
@@ -718,8 +718,8 @@ static int ghes_notify_sci(struct notifier_block *this,
 	return ret;
 }

-static struct notifier_block ghes_notifier_sci = {
-	.notifier_call = ghes_notify_sci,
+static struct notifier_block ghes_notifier_hed = {
+	.notifier_call = ghes_notify_hed,
 };

 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
@@ -966,7 +966,10 @@ static int ghes_probe(struct platform_device *ghes_dev)
 	case ACPI_HEST_NOTIFY_POLLED:
 	case ACPI_HEST_NOTIFY_EXTERNAL:
 	case ACPI_HEST_NOTIFY_SCI:
+	case ACPI_HEST_NOTIFY_GSIV:
+	case ACPI_HEST_NOTIFY_GPIO:
 		break;
+
 	case ACPI_HEST_NOTIFY_NMI:
 		if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) {
 			pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n",
@@ -1024,13 +1027,17 @@ static int ghes_probe(struct platform_device *ghes_dev)
 			goto err_edac_unreg;
 		}
 		break;
+
 	case ACPI_HEST_NOTIFY_SCI:
+	case ACPI_HEST_NOTIFY_GSIV:
+	case ACPI_HEST_NOTIFY_GPIO:
 		mutex_lock(&ghes_list_mutex);
-		if (list_empty(&ghes_sci))
-			register_acpi_hed_notifier(&ghes_notifier_sci);
-		list_add_rcu(&ghes->list, &ghes_sci);
+		if (list_empty(&ghes_hed))
+			register_acpi_hed_notifier(&ghes_notifier_hed);
+		list_add_rcu(&ghes->list, &ghes_hed);
 		mutex_unlock(&ghes_list_mutex);
 		break;
+
 	case ACPI_HEST_NOTIFY_NMI:
 		ghes_nmi_add(ghes);
 		break;
@@ -1066,14 +1073,18 @@ static int ghes_remove(struct platform_device *ghes_dev)
 	case ACPI_HEST_NOTIFY_EXTERNAL:
 		free_irq(ghes->irq, ghes);
 		break;
+
 	case ACPI_HEST_NOTIFY_SCI:
+	case ACPI_HEST_NOTIFY_GSIV:
+	case ACPI_HEST_NOTIFY_GPIO:
 		mutex_lock(&ghes_list_mutex);
 		list_del_rcu(&ghes->list);
-		if (list_empty(&ghes_sci))
-			unregister_acpi_hed_notifier(&ghes_notifier_sci);
+		if (list_empty(&ghes_hed))
+			unregister_acpi_hed_notifier(&ghes_notifier_hed);
 		mutex_unlock(&ghes_list_mutex);
 		synchronize_rcu();
 		break;
+
 	case ACPI_HEST_NOTIFY_NMI:
 		ghes_nmi_remove(ghes);
 		break;

--- a/drivers/ras/cec.c
+++ b/drivers/ras/cec.c
@@ -481,7 +481,7 @@ static int __init create_debugfs_nodes(void)

 	count = debugfs_create_file("count_threshold", S_IRUSR | S_IWUSR, d,
 				    &count_threshold, &count_threshold_ops);
-	if (!decay) {
+	if (!count) {
 		pr_warn("Error creating count_threshold debugfs node!\n");
 		goto err;
 	}

--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -29,7 +29,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);


-int __init parse_ras_param(char *str)
+static int __init parse_ras_param(char *str)
 {
 #ifdef CONFIG_RAS_CEC
 	parse_cec_param(str);

--- a/drivers/xen/mcelog.c
+++ b/drivers/xen/mcelog.c
@@ -408,6 +408,8 @@ static int __init xen_late_init_mcelog(void)
 	if (ret)
 		goto deregister;

+	pr_info("/dev/mcelog registered by Xen\n");
+
 	return 0;

 deregister: