Commit e13284da authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:
 "This time around we have in store:

   - Disable MC4_MISC thresholding banks on all AMD family 0x15 models
     (Shirish S)

   - AMD MCE error descriptions update and error decode improvements
     (Yazen Ghannam)

   - The usual smaller conversions and fixes"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Improve error message when kernel cannot recover, p2
  EDAC/mce_amd: Decode MCA_STATUS in bit definition order
  EDAC/mce_amd: Decode MCA_STATUS[Scrub] bit
  EDAC, mce_amd: Print ExtErrorCode and description on a single line
  EDAC, mce_amd: Match error descriptions to latest documentation
  x86/MCE/AMD, EDAC/mce_amd: Add new error descriptions for some SMCA bank types
  x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units
  x86/MCE/AMD, EDAC/mce_amd: Add new MP5, NBIO, and PCIE SMCA bank types
  RAS: Add a MAINTAINERS entry
  RAS: Use consistent types for UUIDs
  x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk
  x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models
  x86/MCE: Switch to use the new generic UUID API
parents 1b37b8c4 41f035a8
...@@ -12960,6 +12960,16 @@ M: Alexandre Bounine <alex.bou9@gmail.com> ...@@ -12960,6 +12960,16 @@ M: Alexandre Bounine <alex.bou9@gmail.com>
S: Maintained S: Maintained
F: drivers/rapidio/ F: drivers/rapidio/
RAS INFRASTRUCTURE
M: Tony Luck <tony.luck@intel.com>
M: Borislav Petkov <bp@alien8.de>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/ras/
F: include/linux/ras.h
F: include/ras/ras_event.h
F: Documentation/admin-guide/ras.rst
RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER
L: linux-wireless@vger.kernel.org L: linux-wireless@vger.kernel.org
S: Orphan S: Orphan
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ #define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ #define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ #define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */
/* /*
* McaX field if set indicates a given bank supports MCA extensions: * McaX field if set indicates a given bank supports MCA extensions:
...@@ -307,11 +308,17 @@ enum smca_bank_types { ...@@ -307,11 +308,17 @@ enum smca_bank_types {
SMCA_FP, /* Floating Point */ SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */ SMCA_L3_CACHE, /* L3 Cache */
SMCA_CS, /* Coherent Slave */ SMCA_CS, /* Coherent Slave */
SMCA_CS_V2, /* Coherent Slave */
SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */ SMCA_UMC, /* Unified Memory Controller */
SMCA_PB, /* Parameter Block */ SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */ SMCA_PSP, /* Platform Security Processor */
SMCA_PSP_V2, /* Platform Security Processor */
SMCA_SMU, /* System Management Unit */ SMCA_SMU, /* System Management Unit */
SMCA_SMU_V2, /* System Management Unit */
SMCA_MP5, /* Microprocessor 5 Unit */
SMCA_NBIO, /* Northbridge IO Unit */
SMCA_PCIE, /* PCI Express Unit */
N_SMCA_BANK_TYPES N_SMCA_BANK_TYPES
}; };
......
...@@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = { ...@@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
[SMCA_FP] = { "floating_point", "Floating Point Unit" }, [SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS] = { "coherent_slave", "Coherent Slave" }, [SMCA_CS] = { "coherent_slave", "Coherent Slave" },
[SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
[SMCA_UMC] = { "umc", "Unified Memory Controller" }, [SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_PB] = { "param_block", "Parameter Block" }, [SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP] = { "psp", "Platform Security Processor" }, [SMCA_PSP] = { "psp", "Platform Security Processor" },
[SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" }, [SMCA_SMU] = { "smu", "System Management Unit" },
[SMCA_SMU_V2] = { "smu", "System Management Unit" },
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
}; };
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
...@@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = { ...@@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 }, { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
/* ZN Core (HWID=0xB0) MCA types */ /* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF }, { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */ /* HWID 0xB0 MCATYPE 0x4 is Reserved */
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF }, { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F }, { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF }, { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
/* Data Fabric MCA types */ /* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF }, { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF }, { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
/* Unified Memory Controller MCA type */ /* Unified Memory Controller MCA type */
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F }, { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
/* Parameter Block MCA type */ /* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 }, { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
/* Platform Security Processor MCA type */ /* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 }, { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
/* System Management Unit MCA type */ /* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
/* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
/* Northbridge IO Unit MCA type */
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
/* PCI Express Unit MCA type */
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
}; };
struct smca_bank smca_banks[MAX_NR_BANKS]; struct smca_bank smca_banks[MAX_NR_BANKS];
...@@ -545,6 +563,40 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, ...@@ -545,6 +563,40 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
return offset; return offset;
} }
/*
* Turn off MC4_MISC thresholding banks on all family 0x15 models since
* they're not supported there.
*/
void disable_err_thresholding(struct cpuinfo_x86 *c)
{
int i;
u64 hwcr;
bool need_toggle;
u32 msrs[] = {
0x00000413, /* MC4_MISC0 */
0xc0000408, /* MC4_MISC1 */
};
if (c->x86 != 0x15)
return;
rdmsrl(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
for (i = 0; i < ARRAY_SIZE(msrs); i++)
msr_clear_bit(msrs[i], 62);
/* restore old settings */
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr);
}
/* cpu init entry point, called from mce.c with preempt off */ /* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c) void mce_amd_feature_init(struct cpuinfo_x86 *c)
{ {
...@@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) ...@@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block, cpu = smp_processor_id(); unsigned int bank, block, cpu = smp_processor_id();
int offset = -1; int offset = -1;
disable_err_thresholding(c);
for (bank = 0; bank < mca_cfg.banks; ++bank) { for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca) if (mce_flags.smca)
smca_configure(bank, cpu); smca_configure(bank, cpu);
......
...@@ -64,10 +64,10 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) ...@@ -64,10 +64,10 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
#define CPER_CREATOR_MCE \ #define CPER_CREATOR_MCE \
UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
0x64, 0x90, 0xb8, 0x9d) 0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_MCE \ #define CPER_SECTION_TYPE_MCE \
UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
0x04, 0x4a, 0x38, 0xfc) 0x04, 0x4a, 0x38, 0xfc)
/* /*
...@@ -135,7 +135,7 @@ ssize_t apei_read_mce(struct mce *m, u64 *record_id) ...@@ -135,7 +135,7 @@ ssize_t apei_read_mce(struct mce *m, u64 *record_id)
goto out; goto out;
/* try to skip other type records in storage */ /* try to skip other type records in storage */
else if (rc != sizeof(rcd) || else if (rc != sizeof(rcd) ||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
goto retry; goto retry;
memcpy(m, &rcd.mce, sizeof(*m)); memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m); rc = sizeof(*m);
......
...@@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) ...@@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 0x15 && c->x86_model <= 0xf) if (c->x86 == 0x15 && c->x86_model <= 0xf)
mce_flags.overflow_recov = 1; mce_flags.overflow_recov = 1;
/*
* Turn off MC4_MISC thresholding banks on those models since
* they're not supported there.
*/
if (c->x86 == 0x15 &&
(c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
int i;
u64 hwcr;
bool need_toggle;
u32 msrs[] = {
0x00000413, /* MC4_MISC0 */
0xc0000408, /* MC4_MISC1 */
};
rdmsrl(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
for (i = 0; i < ARRAY_SIZE(msrs); i++)
msr_clear_bit(msrs[i], 62);
/* restore old settings */
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr);
}
} }
if (c->x86_vendor == X86_VENDOR_INTEL) { if (c->x86_vendor == X86_VENDOR_INTEL) {
......
...@@ -165,6 +165,11 @@ static struct severity { ...@@ -165,6 +165,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL KERNEL
), ),
MCESEV(
PANIC, "Instruction fetch error in kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
KERNEL
),
#endif #endif
MCESEV( MCESEV(
PANIC, "Action required: unknown MCACOD", PANIC, "Action required: unknown MCACOD",
......
This diff is collapsed.
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#define TRACE_INCLUDE_PATH ../../include/ras #define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h> #include <ras/ras_event.h>
void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id, void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
const char *fru_text, const u8 sev, const u8 *err, const char *fru_text, const u8 sev, const u8 *err,
const u32 len) const u32 len)
{ {
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
TRACE_EVENT(extlog_mem_event, TRACE_EVENT(extlog_mem_event,
TP_PROTO(struct cper_sec_mem_err *mem, TP_PROTO(struct cper_sec_mem_err *mem,
u32 err_seq, u32 err_seq,
const uuid_le *fru_id, const guid_t *fru_id,
const char *fru_text, const char *fru_text,
u8 sev), u8 sev),
...@@ -39,7 +39,7 @@ TRACE_EVENT(extlog_mem_event, ...@@ -39,7 +39,7 @@ TRACE_EVENT(extlog_mem_event,
__field(u8, sev) __field(u8, sev)
__field(u64, pa) __field(u64, pa)
__field(u8, pa_mask_lsb) __field(u8, pa_mask_lsb)
__field_struct(uuid_le, fru_id) __field_struct(guid_t, fru_id)
__string(fru_text, fru_text) __string(fru_text, fru_text)
__field_struct(struct cper_mem_err_compact, data) __field_struct(struct cper_mem_err_compact, data)
), ),
...@@ -218,8 +218,8 @@ TRACE_EVENT(arm_event, ...@@ -218,8 +218,8 @@ TRACE_EVENT(arm_event,
*/ */
TRACE_EVENT(non_standard_event, TRACE_EVENT(non_standard_event,
TP_PROTO(const uuid_le *sec_type, TP_PROTO(const guid_t *sec_type,
const uuid_le *fru_id, const guid_t *fru_id,
const char *fru_text, const char *fru_text,
const u8 sev, const u8 sev,
const u8 *err, const u8 *err,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment