Commit ed7290d0 authored by Andi Kleen's avatar Andi Kleen Committed by H. Peter Anvin

x86, mce: implement new status bits

The x86 architecture recently added some new machine check status bits:
S(ignalled) and AR (Action-Required). Signalled allows to check
if a specific event caused an exception or was just logged through CMCI.
AR allows the kernel to decide if an event needs immediate action
or can be delayed or ignored.

Implement support for these new status bits. mce_severity() uses
the new bits to grade the machine check correctly and decide what
to do. The exception handler uses AR to decide to kill or not.
The S bit is used to separate events between the poll/CMCI handler
and the exception handler.

Classical UC always leads to panic. That was true before anyways
because the existing CPUs always passed a PCC with it.

Also corrects the rules whether to kill in user or kernel context
and how to handle missing RIPV.

The machine check handler largely uses the mce-severity grading
engine now instead of making its own decisions. This means the logic
is centralized in one place.  This is useful because it has to be
evaluated multiple times.

v2: Some rule fixes; Add AO events
Fix RIPV, RIPV|EIPV order (Ying Huang)
Fix UCNA with AR=1 message (Ying Huang)
Add comment about panicing in m_c_p.
Signed-off-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: default avatarH. Peter Anvin <hpa@zytor.com>
parent 86503560
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
#define MCG_EXT_CNT_SHIFT 16 #define MCG_EXT_CNT_SHIFT 16
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
...@@ -27,6 +28,15 @@ ...@@ -27,6 +28,15 @@
#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */
#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
/* MISC register defines */
#define MCM_ADDR_SEGOFF 0 /* segment offset */
#define MCM_ADDR_LINEAR 1 /* linear address */
#define MCM_ADDR_PHYS 2 /* physical address */
#define MCM_ADDR_MEM 3 /* memory address */
#define MCM_ADDR_GENERIC 7 /* generic */
/* Fields are zero when not available */ /* Fields are zero when not available */
struct mce { struct mce {
......
...@@ -2,9 +2,14 @@ ...@@ -2,9 +2,14 @@
enum severity_level { enum severity_level {
MCE_NO_SEVERITY, MCE_NO_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY, MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
MCE_UC_SEVERITY, MCE_UC_SEVERITY,
MCE_AR_SEVERITY,
MCE_PANIC_SEVERITY, MCE_PANIC_SEVERITY,
}; };
int mce_severity(struct mce *a, int tolerant, char **msg); int mce_severity(struct mce *a, int tolerant, char **msg);
extern int mce_ser;
...@@ -19,43 +19,117 @@ ...@@ -19,43 +19,117 @@
* first. Since there are quite a lot of combinations test the bits in a * first. Since there are quite a lot of combinations test the bits in a
* table-driven way. The rules are simply processed in order, first * table-driven way. The rules are simply processed in order, first
* match wins. * match wins.
*
* Note this is only used for machine check exceptions, the corrected
* errors use much simpler rules. The exceptions still check for the corrected
* errors, but only to leave them alone for the CMCI handler (except for
* panic situations)
*/ */
enum context { IN_KERNEL = 1, IN_USER = 2 };
enum ser { SER_REQUIRED = 1, NO_SER = 2 };
static struct severity { static struct severity {
u64 mask; u64 mask;
u64 result; u64 result;
unsigned char sev; unsigned char sev;
unsigned char mcgmask; unsigned char mcgmask;
unsigned char mcgres; unsigned char mcgres;
unsigned char ser;
unsigned char context;
char *msg; char *msg;
} severities[] = { } severities[] = {
#define KERNEL .context = IN_KERNEL
#define USER .context = IN_USER
#define SER .ser = SER_REQUIRED
#define NOSER .ser = NO_SER
#define SEV(s) .sev = MCE_ ## s ## _SEVERITY #define SEV(s) .sev = MCE_ ## s ## _SEVERITY
#define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r }
#define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r }
#define MCGMASK(x, res, s, m, r...) \ #define MCGMASK(x, res, s, m, r...) \
{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
#define MASK(x, y, s, m, r...) \
{ .mask = x, .result = y, SEV(s), .msg = m, ## r }
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
#define MCACOD 0xffff
BITCLR(MCI_STATUS_VAL, NO, "Invalid"), BITCLR(MCI_STATUS_VAL, NO, "Invalid"),
BITCLR(MCI_STATUS_EN, NO, "Not enabled"), BITCLR(MCI_STATUS_EN, NO, "Not enabled"),
BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"),
MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "No restart IP"), /* When MCIP is not set something is very confused */
MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"),
/* Neither return not error IP -- no chance to recover -> PANIC */
MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
"Neither restart nor error IP"),
MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
KERNEL),
BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME,
"Spurious not enabled", SER),
/* ignore OVER for UCNA */
MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP,
"Uncorrected no action required", SER),
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC,
"Illegal combination (UCNA with AR=1)", SER),
MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
/* AR add known MCACODs here */
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
"Action required with lost events", SER),
MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC,
"Action required; unknown MCACOD", SER),
/* known AO MCACODs: */
MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO,
"Action optional: memory scrubbing error", SER),
MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
"Action optional: last level cache writeback error", SER),
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
"Action optional unknown MCACOD", SER),
MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
"Action optional with lost events", SER),
BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"),
BITSET(MCI_STATUS_UC, UC, "Uncorrected"), BITSET(MCI_STATUS_UC, UC, "Uncorrected"),
BITSET(0, SOME, "No match") /* always matches. keep at end */ BITSET(0, SOME, "No match") /* always matches. keep at end */
}; };
/*
* If the EIPV bit is set, it means the saved IP is the
* instruction which caused the MCE.
*/
static int error_context(struct mce *m)
{
if (m->mcgstatus & MCG_STATUS_EIPV)
return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
/* Unknown, assume kernel */
return IN_KERNEL;
}
int mce_severity(struct mce *a, int tolerant, char **msg) int mce_severity(struct mce *a, int tolerant, char **msg)
{ {
enum context ctx = error_context(a);
struct severity *s; struct severity *s;
for (s = severities;; s++) { for (s = severities;; s++) {
if ((a->status & s->mask) != s->result) if ((a->status & s->mask) != s->result)
continue; continue;
if ((a->mcgstatus & s->mcgmask) != s->mcgres) if ((a->mcgstatus & s->mcgmask) != s->mcgres)
continue; continue;
if (s->sev > MCE_NO_SEVERITY && (a->status & MCI_STATUS_UC) && if (s->ser == SER_REQUIRED && !mce_ser)
tolerant < 1) continue;
return MCE_PANIC_SEVERITY; if (s->ser == NO_SER && mce_ser)
continue;
if (s->context && ctx != s->context)
continue;
if (msg) if (msg)
*msg = s->msg; *msg = s->msg;
if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
if (panic_on_oops || tolerant < 1)
return MCE_PANIC_SEVERITY;
}
return s->sev; return s->sev;
} }
} }
...@@ -83,6 +83,7 @@ static int rip_msr; ...@@ -83,6 +83,7 @@ static int rip_msr;
static int mce_bootlog = -1; static int mce_bootlog = -1;
static int monarch_timeout = -1; static int monarch_timeout = -1;
static int mce_panic_timeout; static int mce_panic_timeout;
int mce_ser;
static char trigger[128]; static char trigger[128];
static char *trigger_argv[2] = { trigger, NULL }; static char *trigger_argv[2] = { trigger, NULL };
...@@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); ...@@ -391,6 +392,15 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
* Those are just logged through /dev/mcelog. * Those are just logged through /dev/mcelog.
* *
* This is executed in standard interrupt context. * This is executed in standard interrupt context.
*
* Note: spec recommends to panic for fatal unsignalled
* errors here. However this would be quite problematic --
* we would need to reimplement the Monarch handling and
* it would mess up the exclusion between exception handler
* and poll hander -- * so we skip this for now.
* These cases should not happen anyways, or only when the CPU
* is already totally * confused. In this case it's likely it will
* not fully execute the machine check handler either.
*/ */
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{ {
...@@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -417,13 +427,13 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
continue; continue;
/* /*
* Uncorrected events are handled by the exception handler * Uncorrected or signalled events are handled by the exception
* when it is enabled. But when the exception is disabled log * handler when it is enabled, so don't process those here.
* everything.
* *
* TBD do the same check for MCI_STATUS_EN here? * TBD do the same check for MCI_STATUS_EN here?
*/ */
if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) if (!(flags & MCP_UC) &&
(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue; continue;
if (m.status & MCI_STATUS_MISCV) if (m.status & MCI_STATUS_MISCV)
...@@ -789,6 +799,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -789,6 +799,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
barrier(); barrier();
/*
* When no restart IP must always kill or panic.
*/
if (!(m.mcgstatus & MCG_STATUS_RIPV))
kill_it = 1;
/* /*
* Go through all the banks in exclusion of the other CPUs. * Go through all the banks in exclusion of the other CPUs.
* This way we don't report duplicated events on shared banks * This way we don't report duplicated events on shared banks
...@@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -809,10 +825,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
continue; continue;
/* /*
* Non uncorrected errors are handled by machine_check_poll * Non uncorrected or non signaled errors are handled by
* Leave them alone, unless this panics. * machine_check_poll. Leave them alone, unless this panics.
*/ */
if ((m.status & MCI_STATUS_UC) == 0 && !no_way_out) if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
!no_way_out)
continue; continue;
/* /*
...@@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -820,17 +837,16 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/ */
add_taint(TAINT_MACHINE_CHECK); add_taint(TAINT_MACHINE_CHECK);
__set_bit(i, toclear); severity = mce_severity(&m, tolerant, NULL);
if (m.status & MCI_STATUS_EN) { /*
/* * When machine check was for corrected handler don't touch,
* If this error was uncorrectable and there was * unless we're panicing.
* an overflow, we're in trouble. If no overflow, */
* we might get away with just killing a task. if (severity == MCE_KEEP_SEVERITY && !no_way_out)
*/ continue;
if (m.status & MCI_STATUS_UC) __set_bit(i, toclear);
kill_it = 1; if (severity == MCE_NO_SEVERITY) {
} else {
/* /*
* Machine check event was not enabled. Clear, but * Machine check event was not enabled. Clear, but
* ignore. * ignore.
...@@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -838,6 +854,12 @@ void do_machine_check(struct pt_regs *regs, long error_code)
continue; continue;
} }
/*
* Kill on action required.
*/
if (severity == MCE_AR_SEVERITY)
kill_it = 1;
if (m.status & MCI_STATUS_MISCV) if (m.status & MCI_STATUS_MISCV)
m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
if (m.status & MCI_STATUS_ADDRV) if (m.status & MCI_STATUS_ADDRV)
...@@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -846,7 +868,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_get_rip(&m, regs); mce_get_rip(&m, regs);
mce_log(&m); mce_log(&m);
severity = mce_severity(&m, tolerant, NULL);
if (severity > worst) { if (severity > worst) {
*final = m; *final = m;
worst = severity; worst = severity;
...@@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -879,29 +900,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* one task, do that. If the user has set the tolerance very * one task, do that. If the user has set the tolerance very
* high, don't try to do anything at all. * high, don't try to do anything at all.
*/ */
if (kill_it && tolerant < 3) {
int user_space = 0;
/*
* If the EIPV bit is set, it means the saved IP is the
* instruction which caused the MCE.
*/
if (m.mcgstatus & MCG_STATUS_EIPV)
user_space = final->ip && (final->cs & 3);
/* if (kill_it && tolerant < 3)
* If we know that the error was in user space, send a force_sig(SIGBUS, current);
* SIGBUS. Otherwise, panic if tolerance is low.
*
* force_sig() takes an awful lot of locks and has a slight
* risk of deadlocking.
*/
if (user_space) {
force_sig(SIGBUS, current);
} else if (panic_on_oops || tolerant < 2) {
mce_panic("Uncorrected machine check", final, msg);
}
}
/* notify userspace ASAP */ /* notify userspace ASAP */
set_thread_flag(TIF_MCE_NOTIFY); set_thread_flag(TIF_MCE_NOTIFY);
...@@ -1049,6 +1050,9 @@ static int mce_cap_init(void) ...@@ -1049,6 +1050,9 @@ static int mce_cap_init(void)
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
rip_msr = MSR_IA32_MCG_EIP; rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
mce_ser = 1;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment