Commit ca1b6692 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ras_updates_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Extend the recovery from MCE in kernel space also to processes which
   encounter an MCE in kernel space but while copying from user memory
   by sending them a SIGBUS on return to user space and umapping the
   faulty memory, by Tony Luck and Youquan Song.

 - memcpy_mcsafe() rework by splitting the functionality into
   copy_mc_to_user() and copy_mc_to_kernel(). This, as a result, enables
   support for new hardware which can recover from a machine check
   encountered during a fast string copy and makes that the default and
   lets the older hardware which does not support that advance recovery,
   opt in to use the old, fragile, slow variant, by Dan Williams.

 - New AMD hw enablement, by Yazen Ghannam and Akshay Gupta.

 - Do not use MSR-tracing accessors in #MC context and flag any fault
   while accessing MCA architectural MSRs as an architectural violation
   with the hope that such hw/fw misdesigns are caught early during the
   hw eval phase and they don't make it into production.

 - Misc fixes, improvements and cleanups, as always.

* tag 'ras_updates_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Allow for copy_mc_fragile symbol checksum to be generated
  x86/mce: Decode a kernel instruction to determine if it is copying from user
  x86/mce: Recover from poison found while copying from user space
  x86/mce: Avoid tail copy when machine check terminated a copy from user
  x86/mce: Add _ASM_EXTABLE_CPY for copy user access
  x86/mce: Provide method to find out the type of an exception handler
  x86/mce: Pass pointer to saved pt_regs to severity calculation routines
  x86/copy_mc: Introduce copy_mc_enhanced_fast_string()
  x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
  x86/mce: Drop AMD-specific "DEFERRED" case from Intel severity rule list
  x86/mce: Add Skylake quirk for patrol scrub reported errors
  RAS/CEC: Convert to DEFINE_SHOW_ATTRIBUTE()
  x86/mce: Annotate mce_rd/wrmsrl() with noinstr
  x86/mce/dev-mcelog: Do not update kflags on AMD systems
  x86/mce: Stop mce_reign() from re-computing severity for every CPU
  x86/mce: Make mce_rdmsrl() panic on an inaccessible MSR
  x86/mce: Increase maximum number of banks to 64
  x86/mce: Delay clearing IA32_MCG_STATUS to the end of do_machine_check()
  x86/MCE/AMD, EDAC/mce_amd: Remove struct smca_hwid.xec_bitmap
  RAS/CEC: Fix cec_init() prototype
parents a9a4b7d9 b3149ffc
......@@ -135,7 +135,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX if (PPC32 && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UACCESS_MCSAFE if PPC64
select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
......
......@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
#define __HAVE_ARCH_MEMCPY_MCSAFE
extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
......
......@@ -435,6 +435,32 @@ do { \
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_generic(void *to, const void *from, unsigned long size);
static inline unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned long size)
{
return copy_mc_generic(to, from, size);
}
#define copy_mc_to_kernel copy_mc_to_kernel
static inline unsigned long __must_check
copy_mc_to_user(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = copy_mc_generic((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
#endif
#ifdef __powerpc64__
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
......@@ -523,20 +549,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
return ret;
}
static __always_inline unsigned long __must_check
copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
{
if (likely(check_copy_size(from, n, true))) {
if (access_ok(to, n)) {
allow_write_to_user(to, n);
n = memcpy_mcsafe((void *)to, from, n);
prevent_write_to_user(to, n);
}
}
return n;
}
unsigned long __arch_clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
......
......@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
memcpy_power7.o
obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
memcpy_64.o memcpy_mcsafe_64.o
memcpy_64.o copy_mc_64.o
ifndef CONFIG_PPC_QUEUED_SPINLOCKS
obj64-$(CONFIG_SMP) += locks.o
......
......@@ -50,7 +50,7 @@ err3; stb r0,0(r3)
blr
_GLOBAL(memcpy_mcsafe)
_GLOBAL(copy_mc_generic)
mr r7,r5
cmpldi r5,16
blt .Lshort_copy
......@@ -239,4 +239,4 @@ err1; stb r0,0(r3)
15: li r3,0
blr
EXPORT_SYMBOL_GPL(memcpy_mcsafe);
EXPORT_SYMBOL_GPL(copy_mc_generic);
......@@ -75,7 +75,7 @@ config X86
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_COPY_MC if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
......
......@@ -62,7 +62,7 @@ config EARLY_PRINTK_USB_XDBC
You should normally say N here, unless you want to debug early
crashes or need a very simple printk logging facility.
config MCSAFE_TEST
config COPY_MC_TEST
def_bool n
config EFI_PGT_DUMP
......
......@@ -5,6 +5,7 @@
#include <asm/string.h>
#include <asm/page.h>
#include <asm/checksum.h>
#include <asm/mce.h>
#include <asm-generic/asm-prototypes.h>
......
......@@ -135,6 +135,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
# define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
......@@ -160,6 +163,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
# define _ASM_EXTABLE_CPY(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _COPY_MC_TEST_H_
#define _COPY_MC_TEST_H_
#ifndef __ASSEMBLY__
#ifdef CONFIG_COPY_MC_TEST
extern unsigned long copy_mc_test_src;
extern unsigned long copy_mc_test_dst;
static inline void copy_mc_inject_src(void *addr)
{
if (addr)
copy_mc_test_src = (unsigned long) addr;
else
copy_mc_test_src = ~0UL;
}
static inline void copy_mc_inject_dst(void *addr)
{
if (addr)
copy_mc_test_dst = (unsigned long) addr;
else
copy_mc_test_dst = ~0UL;
}
#else /* CONFIG_COPY_MC_TEST */
static inline void copy_mc_inject_src(void *addr)
{
}
static inline void copy_mc_inject_dst(void *addr)
{
}
#endif /* CONFIG_COPY_MC_TEST */
#else /* __ASSEMBLY__ */
#include <asm/export.h>
#ifdef CONFIG_COPY_MC_TEST
.macro COPY_MC_TEST_CTL
.pushsection .data
.align 8
.globl copy_mc_test_src
copy_mc_test_src:
.quad 0
EXPORT_SYMBOL_GPL(copy_mc_test_src)
.globl copy_mc_test_dst
copy_mc_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(copy_mc_test_dst)
.popsection
.endm
.macro COPY_MC_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp copy_mc_test_src, %r9
ja \target
.endm
.macro COPY_MC_TEST_DST reg count target
leaq \count(\reg), %r9
cmp copy_mc_test_dst, %r9
ja \target
.endm
#else
.macro COPY_MC_TEST_CTL
.endm
.macro COPY_MC_TEST_SRC reg count target
.endm
.macro COPY_MC_TEST_DST reg count target
.endm
#endif /* CONFIG_COPY_MC_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _COPY_MC_TEST_H_ */
......@@ -29,10 +29,17 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
enum handler_type {
EX_HANDLER_NONE,
EX_HANDLER_FAULT,
EX_HANDLER_UACCESS,
EX_HANDLER_OTHER
};
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern bool ex_has_fault_handler(unsigned long ip);
extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#endif
......@@ -136,8 +136,23 @@
#define MCE_HANDLED_NFIT BIT_ULL(3)
#define MCE_HANDLED_EDAC BIT_ULL(4)
#define MCE_HANDLED_MCELOG BIT_ULL(5)
/*
* Indicates an MCE which has happened in kernel space but from
* which the kernel can recover simply by executing fixup_exception()
* so that an error is returned to the caller of the function that
* hit the machine check.
*/
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
/*
* Indicates an MCE that happened in kernel space while copying data
* from user. In this case fixup_exception() gets the kernel to the
* error exit for the copy function. Machine check handler can then
* treat it like a fault taken in user mode.
*/
#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
......@@ -174,6 +189,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
extern int mce_p5_enabled;
#ifdef CONFIG_ARCH_HAS_COPY_MC
extern void enable_copy_mc_fragile(void);
unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
#else
static inline void enable_copy_mc_fragile(void)
{
}
#endif
#ifdef CONFIG_X86_MCE
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
......@@ -200,12 +224,8 @@ void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct device *, mce_device);
/*
* Maximum banks number.
* This is the limit of the current register layout on
* Intel CPUs.
*/
#define MAX_NR_BANKS 32
/* Maximum number of MCA banks per CPU. */
#define MAX_NR_BANKS 64
#ifdef CONFIG_X86_MCE_INTEL
void mce_intel_feature_init(struct cpuinfo_x86 *c);
......@@ -328,7 +348,6 @@ enum smca_bank_types {
struct smca_hwid {
unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */
u32 hwid_mcatype; /* (hwid,mcatype) tuple */
u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */
u8 count; /* Number of instances. */
};
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MCSAFE_TEST_H_
#define _MCSAFE_TEST_H_
#ifndef __ASSEMBLY__
#ifdef CONFIG_MCSAFE_TEST
extern unsigned long mcsafe_test_src;
extern unsigned long mcsafe_test_dst;
static inline void mcsafe_inject_src(void *addr)
{
if (addr)
mcsafe_test_src = (unsigned long) addr;
else
mcsafe_test_src = ~0UL;
}
static inline void mcsafe_inject_dst(void *addr)
{
if (addr)
mcsafe_test_dst = (unsigned long) addr;
else
mcsafe_test_dst = ~0UL;
}
#else /* CONFIG_MCSAFE_TEST */
static inline void mcsafe_inject_src(void *addr)
{
}
static inline void mcsafe_inject_dst(void *addr)
{
}
#endif /* CONFIG_MCSAFE_TEST */
#else /* __ASSEMBLY__ */
#include <asm/export.h>
#ifdef CONFIG_MCSAFE_TEST
.macro MCSAFE_TEST_CTL
.pushsection .data
.align 8
.globl mcsafe_test_src
mcsafe_test_src:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_src)
.globl mcsafe_test_dst
mcsafe_test_dst:
.quad 0
EXPORT_SYMBOL_GPL(mcsafe_test_dst)
.popsection
.endm
.macro MCSAFE_TEST_SRC reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_src, %r9
ja \target
.endm
.macro MCSAFE_TEST_DST reg count target
leaq \count(\reg), %r9
cmp mcsafe_test_dst, %r9
ja \target
.endm
#else
.macro MCSAFE_TEST_CTL
.endm
.macro MCSAFE_TEST_SRC reg count target
.endm
.macro MCSAFE_TEST_DST reg count target
.endm
#endif /* CONFIG_MCSAFE_TEST */
#endif /* __ASSEMBLY__ */
#endif /* _MCSAFE_TEST_H_ */
......@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
#endif
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
size_t cnt);
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
/**
* memcpy_mcsafe - copy memory with indication if a machine check happened
*
* @dst: destination address
* @src: source address
* @cnt: number of bytes to copy
*
* Low level memory copy function that catches machine checks
* We only call into the "safe" function on systems that can
* actually do machine check recovery. Everyone else can just
* use memcpy().
*
* Return 0 for success, or number of bytes not copied if there was an
* exception.
*/
static __always_inline __must_check unsigned long
memcpy_mcsafe(void *dst, const void *src, size_t cnt)
{
#ifdef CONFIG_X86_MCE
if (static_branch_unlikely(&mcsafe_key))
return __memcpy_mcsafe(dst, src, cnt);
else
#endif
memcpy(dst, src, cnt);
return 0;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
......
......@@ -35,6 +35,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
bool fault_in_kernel_space(unsigned long address);
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
......
......@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
unsigned long __must_check clear_user(void __user *mem, unsigned long len);
unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
#ifdef CONFIG_ARCH_HAS_COPY_MC
unsigned long __must_check
copy_mc_to_kernel(void *to, const void *from, unsigned len);
#define copy_mc_to_kernel copy_mc_to_kernel
unsigned long __must_check
copy_mc_to_user(void *to, const void *from, unsigned len);
#endif
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
......
......@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
return ret;
}
static __always_inline __must_check unsigned long
copy_to_user_mcsafe(void *to, const void *from, unsigned len)
{
unsigned long ret;
__uaccess_begin();
/*
* Note, __memcpy_mcsafe() is explicitly used since it can
* handle exceptions / faults. memcpy_mcsafe() may fall back to
* memcpy() which lacks this handling.
*/
ret = __memcpy_mcsafe(to, from, len);
__uaccess_end();
return ret;
}
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
......@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */
......@@ -132,49 +132,49 @@ static enum smca_bank_types smca_get_bank_type(unsigned int bank)
}
static struct smca_hwid smca_hwid_mcatypes[] = {
/* { bank_type, hwid_mcatype, xec_bitmap } */
/* { bank_type, hwid_mcatype } */
/* Reserved type */
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0) },
/* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0) },
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10) },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1) },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2) },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3) },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5) },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6) },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7) },
/* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0) },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1) },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2) },
/* Unified Memory Controller MCA type */
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0) },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0) },
/* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0) },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1) },
/* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0) },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1) },
/* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2) },
/* Northbridge IO Unit MCA type */
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) },
/* PCI Express Unit MCA type */
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
......
......@@ -40,7 +40,6 @@
#include <linux/debugfs.h>
#include <linux/irq_work.h>
#include <linux/export.h>
#include <linux/jump_label.h>
#include <linux/set_memory.h>
#include <linux/sync_core.h>
#include <linux/task_work.h>
......@@ -373,42 +372,105 @@ static int msr_to_offset(u32 msr)
return -1;
}
__visible bool ex_handler_rdmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
pr_emerg("MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip);
show_stack_regs(regs);
panic("MCA architectural violation!\n");
while (true)
cpu_relax();
return true;
}
/* MSR access wrappers used for error injection */
static u64 mce_rdmsrl(u32 msr)
static noinstr u64 mce_rdmsrl(u32 msr)
{
u64 v;
DECLARE_ARGS(val, low, high);
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
int offset;
u64 ret;
instrumentation_begin();
offset = msr_to_offset(msr);
if (offset < 0)
return 0;
return *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
}
ret = 0;
else
ret = *(u64 *)((char *)this_cpu_ptr(&injectm) + offset);
if (rdmsrl_safe(msr, &v)) {
WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr);
/*
* Return zero in case the access faulted. This should
* not happen normally but can happen if the CPU does
* something weird, or if the code is buggy.
*/
v = 0;
instrumentation_end();
return ret;
}
return v;
/*
* RDMSR on MCA MSRs should not fault. If they do, this is very much an
* architectural violation and needs to be reported to hw vendor. Panic
* the box to not allow any further progress.
*/
asm volatile("1: rdmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_fault)
: EAX_EDX_RET(val, low, high) : "c" (msr));
return EAX_EDX_VAL(val, low, high);
}
__visible bool ex_handler_wrmsr_fault(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
unsigned long fault_addr)
{
pr_emerg("MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx, (unsigned int)regs->ax,
regs->ip, (void *)regs->ip);
show_stack_regs(regs);
panic("MCA architectural violation!\n");
while (true)
cpu_relax();
return true;
}
static void mce_wrmsrl(u32 msr, u64 v)
static noinstr void mce_wrmsrl(u32 msr, u64 v)
{
u32 low, high;
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
int offset;
instrumentation_begin();
offset = msr_to_offset(msr);
if (offset >= 0)
*(u64 *)((char *)this_cpu_ptr(&injectm) + offset) = v;
instrumentation_end();
return;
}
wrmsrl(msr, v);
low = (u32)v;
high = (u32)(v >> 32);
/* See comment in mce_rdmsrl() */
asm volatile("1: wrmsr\n"
"2:\n"
_ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_fault)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
/*
......@@ -745,7 +807,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
goto clear_it;
mce_read_aux(&m, i);
m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
......@@ -794,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
m->bank = i;
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i);
*msg = tmp;
return 1;
......@@ -872,7 +934,6 @@ static void mce_reign(void)
struct mce *m = NULL;
int global_worst = 0;
char *msg = NULL;
char *nmsg = NULL;
/*
* This CPU is the Monarch and the other CPUs have run
......@@ -880,12 +941,10 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
int severity = mce_severity(&per_cpu(mces_seen, cpu),
mca_cfg.tolerant,
&nmsg, true);
if (severity > global_worst) {
msg = nmsg;
global_worst = severity;
struct mce *mtmp = &per_cpu(mces_seen, cpu);
if (mtmp->severity > global_worst) {
global_worst = mtmp->severity;
m = &per_cpu(mces_seen, cpu);
}
}
......@@ -895,8 +954,11 @@ static void mce_reign(void)
* This dumps all the mces in the log buffer and stops the
* other CPUs.
*/
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
/* call mce_severity() to get "msg" for panic */
mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
mce_panic("Fatal machine check", m, msg);
}
/*
* For UC somewhere we let the CPU who detects it handle it.
......@@ -1105,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
static void __mc_scan_banks(struct mce *m, struct mce *final,
static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
......@@ -1140,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
severity = mce_severity(m, cfg->tolerant, NULL, true);
severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
/*
* When machine check was for corrected/deferred handler don't
......@@ -1188,13 +1250,34 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
!(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
}
pr_err("Memory error not recovered");
kill_me_now(cb);
if (p->mce_vaddr != (void __user *)-1l) {
force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
} else {
pr_err("Memory error not recovered");
kill_me_now(cb);
}
}
static void queue_task_work(struct mce *m, int kill_it)
{
current->mce_addr = m->addr;
current->mce_kflags = m->kflags;
current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(m);
if (kill_it)
current->mce_kill_me.func = kill_me_now;
else
current->mce_kill_me.func = kill_me_maybe;
task_work_add(current, &current->mce_kill_me, true);
}
/*
......@@ -1291,7 +1374,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
__mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
__mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
......@@ -1313,7 +1396,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
mce_severity(&m, cfg->tolerant, &msg, true);
mce_severity(&m, regs, cfg->tolerant, &msg, true);
mce_panic("Local fatal machine check!", &m, msg);
}
}
......@@ -1330,25 +1413,16 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (worst > 0)
irq_work_queue(&mce_irq_work);
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
sync_core();
if (worst != MCE_AR_SEVERITY && !kill_it)
return;
goto out;
/* Fault was in user mode and we need to take some action */
if ((m.cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
current->mce_addr = m.addr;
current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
current->mce_whole_page = whole_page(&m);
current->mce_kill_me.func = kill_me_maybe;
if (kill_it)
current->mce_kill_me.func = kill_me_now;
task_work_add(current, &current->mce_kill_me, true);
queue_task_work(&m, kill_it);
} else {
/*
* Handle an MCE which has happened in kernel space but from
......@@ -1363,7 +1437,12 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
if (m.kflags & MCE_IN_KERNEL_COPYIN)
queue_task_work(&m, kill_it);
}
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
}
EXPORT_SYMBOL_GPL(do_machine_check);
......@@ -2064,7 +2143,7 @@ void mce_disable_bank(int bank)
and older.
* mce=nobootlog Don't log MCEs from before booting.
* mce=bios_cmci_threshold Don't program the CMCI threshold
* mce=recovery force enable memcpy_mcsafe()
* mce=recovery force enable copy_mc_fragile()
*/
static int __init mcheck_enable(char *str)
{
......@@ -2672,13 +2751,10 @@ static void __init mcheck_debugfs_init(void)
static void __init mcheck_debugfs_init(void) { }
#endif
DEFINE_STATIC_KEY_FALSE(mcsafe_key);
EXPORT_SYMBOL_GPL(mcsafe_key);
static int __init mcheck_late_init(void)
{
if (mca_cfg.recovery)
static_branch_inc(&mcsafe_key);
enable_copy_mc_fragile();
mcheck_debugfs_init();
......
......@@ -67,7 +67,9 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
unlock:
mutex_unlock(&mce_chrdev_read_mutex);
mce->kflags |= MCE_HANDLED_MCELOG;
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
mce->kflags |= MCE_HANDLED_MCELOG;
return NOTIFY_OK;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment