Commit 11868a2d authored by Ingo Molnar's avatar Ingo Molnar

x86: mce: Use safer ways to access MCE registers

Use rdmsrl_safe() when accessing MCE registers. While in
theory we always 'know' which ones are safe to access from
the capability bits, there's a lot of hardware variations
and reality might differ from theory, as it did in this case:

   http://bugzilla.kernel.org/show_bug.cgi?id=14204

[    0.010016] mce: CPU supports 5 MCE banks
[    0.011029] general protection fault: 0000 [#1]
[    0.011998] last sysfs file:
[    0.011998] Modules linked in:
[    0.011998]
[    0.011998] Pid: 0, comm: swapper Not tainted (2.6.31_router #1) HP Vectra
[    0.011998] EIP: 0060:[<c100d9b9>] EFLAGS: 00010246 CPU: 0
[    0.011998] EIP is at mce_rdmsrl+0x19/0x60
[    0.011998] EAX: 00000000 EBX: 00000001 ECX: 00000407 EDX: 08000000
[    0.011998] ESI: 00000000 EDI: 8c000000 EBP: 00000405 ESP: c17d5eac

So WARN_ONCE() instead of crashing the box.

( also fix a number of stylistic inconsistencies in the code. )

Note, we might still crash in wrmsrl() if we get that far, but
we shouldnt if the registers are truly inaccessible.
Reported-by: default avatarGNUtoo <GNUtoo@no-log.org>
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
LKML-Reference: <bug-14204-5438@http.bugzilla.kernel.org/>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 14c93e8e
...@@ -305,13 +305,25 @@ static int msr_to_offset(u32 msr) ...@@ -305,13 +305,25 @@ static int msr_to_offset(u32 msr)
static u64 mce_rdmsrl(u32 msr) static u64 mce_rdmsrl(u32 msr)
{ {
u64 v; u64 v;
if (__get_cpu_var(injectm).finished) { if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr); int offset = msr_to_offset(msr);
if (offset < 0) if (offset < 0)
return 0; return 0;
return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
} }
rdmsrl(msr, v);
if (rdmsrl_safe(msr, &v)) {
WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
/*
* Return zero in case the access faulted. This should
* not happen normally but can happen if the CPU does
* something weird, or if the code is buggy.
*/
v = 0;
}
return v; return v;
} }
...@@ -319,6 +331,7 @@ static void mce_wrmsrl(u32 msr, u64 v) ...@@ -319,6 +331,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
{ {
if (__get_cpu_var(injectm).finished) { if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr); int offset = msr_to_offset(msr);
if (offset >= 0) if (offset >= 0)
*(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
return; return;
...@@ -415,7 +428,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) ...@@ -415,7 +428,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
m->ip = mce_rdmsrl(rip_msr); m->ip = mce_rdmsrl(rip_msr);
} }
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
/* /*
* Called after interrupts have been reenabled again * Called after interrupts have been reenabled again
* when a MCE happened during an interrupts off region * when a MCE happened during an interrupts off region
...@@ -1172,6 +1185,7 @@ static int mce_banks_init(void) ...@@ -1172,6 +1185,7 @@ static int mce_banks_init(void)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
b->ctl = -1ULL; b->ctl = -1ULL;
b->init = 1; b->init = 1;
} }
...@@ -1203,6 +1217,7 @@ static int __cpuinit mce_cap_init(void) ...@@ -1203,6 +1217,7 @@ static int __cpuinit mce_cap_init(void)
banks = b; banks = b;
if (!mce_banks) { if (!mce_banks) {
int err = mce_banks_init(); int err = mce_banks_init();
if (err) if (err)
return err; return err;
} }
...@@ -1237,6 +1252,7 @@ static void mce_init(void) ...@@ -1237,6 +1252,7 @@ static void mce_init(void)
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (!b->init) if (!b->init)
continue; continue;
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
...@@ -1626,6 +1642,7 @@ static int mce_disable(void) ...@@ -1626,6 +1642,7 @@ static int mce_disable(void)
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (b->init) if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0); wrmsrl(MSR_IA32_MCx_CTL(i), 0);
} }
...@@ -1911,6 +1928,7 @@ static void mce_disable_cpu(void *h) ...@@ -1911,6 +1928,7 @@ static void mce_disable_cpu(void *h)
cmci_clear(); cmci_clear();
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (b->init) if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0); wrmsrl(MSR_IA32_MCx_CTL(i), 0);
} }
...@@ -1928,6 +1946,7 @@ static void mce_reenable_cpu(void *h) ...@@ -1928,6 +1946,7 @@ static void mce_reenable_cpu(void *h)
cmci_reenable(); cmci_reenable();
for (i = 0; i < banks; i++) { for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i]; struct mce_bank *b = &mce_banks[i];
if (b->init) if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment