Commit f8bc0881 authored by Vishal Verma's avatar Vishal Verma Committed by Greg Kroah-Hartman

x86/mce: Make the MCE notifier a blocking one

commit 0dc9c639 upstream.

The NFIT MCE handler callback (for handling media errors on NVDIMMs)
takes a mutex to add the location of a memory error to a list. But since
the notifier call chain for machine checks (x86_mce_decoder_chain) is
atomic, we get a lockdep splat like:

  BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
  in_atomic(): 1, irqs_disabled(): 0, pid: 4, name: kworker/0:0
  [..]
  Call Trace:
   dump_stack
   ___might_sleep
   __might_sleep
   mutex_lock_nested
   ? __lock_acquire
   nfit_handle_mce
   notifier_call_chain
   atomic_notifier_call_chain
   ? atomic_notifier_call_chain
   mce_gen_pool_process

Convert the notifier to a blocking one which gets to run only in process
context.

Boris: remove the notifier call in atomic context in print_mce(). For
now, let's print the MCE on the atomic path so that we can make sure
they go out and get logged at least.

Fixes: 6839a6d9 ("nfit: do an ARS scrub on hitting a latent media error")
Reported-by: default avatarRoss Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: default avatarVishal Verma <vishal.l.verma@intel.com>
Acked-by: default avatarTony Luck <tony.luck@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/20170411224457.24777-1-vishal.l.verma@intel.comSigned-off-by: default avatarBorislav Petkov <bp@suse.de>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 6966a657
...@@ -85,7 +85,7 @@ void mce_gen_pool_process(void) ...@@ -85,7 +85,7 @@ void mce_gen_pool_process(void)
head = llist_reverse_order(head); head = llist_reverse_order(head);
llist_for_each_entry_safe(node, tmp, head, llnode) { llist_for_each_entry_safe(node, tmp, head, llnode) {
mce = &node->mce; mce = &node->mce;
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
} }
} }
......
...@@ -13,7 +13,7 @@ enum severity_level { ...@@ -13,7 +13,7 @@ enum severity_level {
MCE_PANIC_SEVERITY, MCE_PANIC_SEVERITY,
}; };
extern struct atomic_notifier_head x86_mce_decoder_chain; extern struct blocking_notifier_head x86_mce_decoder_chain;
#define ATTR_LEN 16 #define ATTR_LEN 16
#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
......
...@@ -120,7 +120,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); ...@@ -120,7 +120,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
* CPU/chipset specific EDAC code can register a notifier call here to print * CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form. * MCE errors in a human-readable form.
*/ */
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain);
/* Do initial initialization of a struct mce */ /* Do initial initialization of a struct mce */
void mce_setup(struct mce *m) void mce_setup(struct mce *m)
...@@ -213,13 +213,13 @@ void mce_register_decode_chain(struct notifier_block *nb) ...@@ -213,13 +213,13 @@ void mce_register_decode_chain(struct notifier_block *nb)
if (nb != &mce_srao_nb && nb->priority == INT_MAX) if (nb != &mce_srao_nb && nb->priority == INT_MAX)
nb->priority -= 1; nb->priority -= 1;
atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); blocking_notifier_chain_register(&x86_mce_decoder_chain, nb);
} }
EXPORT_SYMBOL_GPL(mce_register_decode_chain); EXPORT_SYMBOL_GPL(mce_register_decode_chain);
void mce_unregister_decode_chain(struct notifier_block *nb) void mce_unregister_decode_chain(struct notifier_block *nb)
{ {
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb);
} }
EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
...@@ -272,8 +272,6 @@ struct mca_msr_regs msr_ops = { ...@@ -272,8 +272,6 @@ struct mca_msr_regs msr_ops = {
static void print_mce(struct mce *m) static void print_mce(struct mce *m)
{ {
int ret = 0;
pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
m->extcpu, m->mcgstatus, m->bank, m->status); m->extcpu, m->mcgstatus, m->bank, m->status);
...@@ -309,14 +307,6 @@ static void print_mce(struct mce *m) ...@@ -309,14 +307,6 @@ static void print_mce(struct mce *m)
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid,
cpu_data(m->extcpu).microcode); cpu_data(m->extcpu).microcode);
/*
* Print out human-readable details about the MCE error,
* (if the CPU has an implementation for that)
*/
ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
if (ret == NOTIFY_STOP)
return;
pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment