Commit 4bd20db2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
 "Various x86 MCE fixes and small enhancements"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Make usable address checks Intel-only
  x86/mce: Add the missing memory error check on AMD
  x86/RAS: Remove mce.usable_addr
  x86/mce: Do not enter deferred errors into the generic pool twice
parents 5cb52b5e feab21f8
...@@ -16,7 +16,7 @@ struct mce { ...@@ -16,7 +16,7 @@ struct mce {
__u8 cpuvendor; /* cpu vendor as encoded in system.h */ __u8 cpuvendor; /* cpu vendor as encoded in system.h */
__u8 inject_flags; /* software inject flags */ __u8 inject_flags; /* software inject flags */
__u8 severity; __u8 severity;
__u8 usable_addr; __u8 pad;
__u32 cpuid; /* CPUID 1 EAX */ __u32 cpuid; /* CPUID 1 EAX */
__u8 cs; /* code segment */ __u8 cs; /* code segment */
__u8 bank; /* machine check bank */ __u8 bank; /* machine check bank */
......
...@@ -114,7 +114,6 @@ static struct work_struct mce_work; ...@@ -114,7 +114,6 @@ static struct work_struct mce_work;
static struct irq_work mce_irq_work; static struct irq_work mce_irq_work;
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
static int mce_usable_address(struct mce *m);
/* /*
* CPU/chipset specific EDAC code can register a notifier call here to print * CPU/chipset specific EDAC code can register a notifier call here to print
...@@ -475,6 +474,28 @@ static void mce_report_event(struct pt_regs *regs) ...@@ -475,6 +474,28 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&mce_irq_work); irq_work_queue(&mce_irq_work);
} }
/*
* Check if the address reported by the CPU is in a format we can parse.
* It would be possible to add code for most other cases, but all would
* be somewhat complicated (e.g. segment offset would require an instruction
* parser). So only support physical addresses up to page granuality for now.
*/
static int mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
return 0;
/* Checks after this one are Intel-specific: */
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 1;
if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
return 0;
if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
return 0;
return 1;
}
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
void *data) void *data)
{ {
...@@ -484,7 +505,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, ...@@ -484,7 +505,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
if (!mce) if (!mce)
return NOTIFY_DONE; return NOTIFY_DONE;
if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) { if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
pfn = mce->addr >> PAGE_SHIFT; pfn = mce->addr >> PAGE_SHIFT;
memory_failure(pfn, MCE_VECTOR, 0); memory_failure(pfn, MCE_VECTOR, 0);
} }
...@@ -522,10 +543,10 @@ static bool memory_error(struct mce *m) ...@@ -522,10 +543,10 @@ static bool memory_error(struct mce *m)
struct cpuinfo_x86 *c = &boot_cpu_data; struct cpuinfo_x86 *c = &boot_cpu_data;
if (c->x86_vendor == X86_VENDOR_AMD) { if (c->x86_vendor == X86_VENDOR_AMD) {
/* /* ErrCodeExt[20:16] */
* coming soon u8 xec = (m->status >> 16) & 0x1f;
*/
return false; return (xec == 0x0 || xec == 0x8);
} else if (c->x86_vendor == X86_VENDOR_INTEL) { } else if (c->x86_vendor == X86_VENDOR_INTEL) {
/* /*
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
...@@ -567,7 +588,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); ...@@ -567,7 +588,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
*/ */
bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{ {
bool error_logged = false; bool error_seen = false;
struct mce m; struct mce m;
int severity; int severity;
int i; int i;
...@@ -601,6 +622,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -601,6 +622,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
(m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue; continue;
error_seen = true;
mce_read_aux(&m, i); mce_read_aux(&m, i);
if (!(flags & MCP_TIMESTAMP)) if (!(flags & MCP_TIMESTAMP))
...@@ -608,27 +631,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -608,27 +631,24 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
/* if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
* In the cases where we don't have a valid address after all, if (m.status & MCI_STATUS_ADDRV)
* do not add it into the ring buffer.
*/
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
if (m.status & MCI_STATUS_ADDRV) {
m.severity = severity; m.severity = severity;
m.usable_addr = mce_usable_address(&m);
if (!mce_gen_pool_add(&m))
mce_schedule_work();
}
}
/* /*
* Don't get the IP here because it's unlikely to * Don't get the IP here because it's unlikely to
* have anything to do with the actual error location. * have anything to do with the actual error location.
*/ */
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce) { if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
error_logged = true;
mce_log(&m); mce_log(&m);
else if (mce_usable_address(&m)) {
/*
* Although we skipped logging this, we still want
* to take action. Add to the pool so the registered
* notifiers will see it.
*/
if (!mce_gen_pool_add(&m))
mce_schedule_work();
} }
/* /*
...@@ -644,7 +664,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -644,7 +664,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
sync_core(); sync_core();
return error_logged; return error_seen;
} }
EXPORT_SYMBOL_GPL(machine_check_poll); EXPORT_SYMBOL_GPL(machine_check_poll);
...@@ -931,23 +951,6 @@ static int mce_end(int order) ...@@ -931,23 +951,6 @@ static int mce_end(int order)
return ret; return ret;
} }
/*
* Check if the address reported by the CPU is in a format we can parse.
* It would be possible to add code for most other cases, but all would
* be somewhat complicated (e.g. segment offset would require an instruction
* parser). So only support physical addresses up to page granuality for now.
*/
static int mce_usable_address(struct mce *m)
{
if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
return 0;
if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
return 0;
if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
return 0;
return 1;
}
static void mce_clear_state(unsigned long *toclear) static void mce_clear_state(unsigned long *toclear)
{ {
int i; int i;
...@@ -1100,7 +1103,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1100,7 +1103,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
/* assuming valid severity level != 0 */ /* assuming valid severity level != 0 */
m.severity = severity; m.severity = severity;
m.usable_addr = mce_usable_address(&m);
mce_log(&m); mce_log(&m);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment