Commit 30f5a756 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:

 - Misc fixes to the MCE code all over the place, by Jan H. Schönherr.

 - Initial support for AMD F19h and other cleanups to amd64_edac, by
   Yazen Ghannam.

 - Other small cleanups.

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  EDAC/mce_amd: Make fam_ops static global
  EDAC/amd64: Drop some family checks for newer systems
  EDAC/amd64: Add family ops for Family 19h Models 00h-0Fh
  x86/amd_nb: Add Family 19h PCI IDs
  EDAC/mce_amd: Always load on SMCA systems
  x86/MCE/AMD, EDAC/mce_amd: Add new Load Store unit McaType
  x86/mce: Fix use of uninitialized MCE message string
  x86/mce: Fix mce=nobootlog
  x86/mce: Take action on UCNA/Deferred errors again
  x86/mce: Remove mce_inject_log() in favor of mce_log()
  x86/mce: Pass MCE message to mce_panic() on failed kernel recovery
  x86/mce/therm_throt: Mark throttle_active_work() as __maybe_unused
parents b62061b8 86e9f9d6
...@@ -144,7 +144,7 @@ struct mce_log_buffer { ...@@ -144,7 +144,7 @@ struct mce_log_buffer {
enum mce_notifier_prios { enum mce_notifier_prios {
MCE_PRIO_FIRST = INT_MAX, MCE_PRIO_FIRST = INT_MAX,
MCE_PRIO_SRAO = INT_MAX - 1, MCE_PRIO_UC = INT_MAX - 1,
MCE_PRIO_EXTLOG = INT_MAX - 2, MCE_PRIO_EXTLOG = INT_MAX - 2,
MCE_PRIO_NFIT = INT_MAX - 3, MCE_PRIO_NFIT = INT_MAX - 3,
MCE_PRIO_EDAC = INT_MAX - 4, MCE_PRIO_EDAC = INT_MAX - 4,
...@@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected, ...@@ -290,6 +290,7 @@ extern void apei_mce_report_mem_error(int corrected,
/* These may be used by multiple smca_hwid_mcatypes */ /* These may be used by multiple smca_hwid_mcatypes */
enum smca_bank_types { enum smca_bank_types {
SMCA_LS = 0, /* Load Store */ SMCA_LS = 0, /* Load Store */
SMCA_LS_V2, /* Load Store */
SMCA_IF, /* Instruction Fetch */ SMCA_IF, /* Instruction Fetch */
SMCA_L2_CACHE, /* L2 Cache */ SMCA_L2_CACHE, /* L2 Cache */
SMCA_DE, /* Decoder Unit */ SMCA_DE, /* Decoder Unit */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444
#define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654
/* Protect the PCI config register pairs used for SMN and DF indirect access. */ /* Protect the PCI config register pairs used for SMN and DF indirect access. */
static DEFINE_MUTEX(smn_mutex); static DEFINE_MUTEX(smn_mutex);
...@@ -52,6 +53,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { ...@@ -52,6 +53,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) },
{} {}
}; };
EXPORT_SYMBOL_GPL(amd_nb_misc_ids); EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
...@@ -66,6 +68,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { ...@@ -66,6 +68,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
{} {}
}; };
......
...@@ -78,6 +78,7 @@ struct smca_bank_name { ...@@ -78,6 +78,7 @@ struct smca_bank_name {
static struct smca_bank_name smca_names[] = { static struct smca_bank_name smca_names[] = {
[SMCA_LS] = { "load_store", "Load Store Unit" }, [SMCA_LS] = { "load_store", "Load Store Unit" },
[SMCA_LS_V2] = { "load_store", "Load Store Unit" },
[SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" },
[SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" },
[SMCA_DE] = { "decode_unit", "Decode Unit" }, [SMCA_DE] = { "decode_unit", "Decode Unit" },
...@@ -138,6 +139,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = { ...@@ -138,6 +139,7 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
/* ZN Core (HWID=0xB0) MCA types */ /* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF }, { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_LS_V2, HWID_MCATYPE(0xB0, 0x10), 0xFFFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
......
...@@ -53,8 +53,6 @@ ...@@ -53,8 +53,6 @@
#include "internal.h" #include "internal.h"
static DEFINE_MUTEX(mce_log_mutex);
/* sysfs synchronization */ /* sysfs synchronization */
static DEFINE_MUTEX(mce_sysfs_mutex); static DEFINE_MUTEX(mce_sysfs_mutex);
...@@ -156,19 +154,10 @@ void mce_log(struct mce *m) ...@@ -156,19 +154,10 @@ void mce_log(struct mce *m)
if (!mce_gen_pool_add(m)) if (!mce_gen_pool_add(m))
irq_work_queue(&mce_irq_work); irq_work_queue(&mce_irq_work);
} }
EXPORT_SYMBOL_GPL(mce_log);
void mce_inject_log(struct mce *m)
{
mutex_lock(&mce_log_mutex);
mce_log(m);
mutex_unlock(&mce_log_mutex);
}
EXPORT_SYMBOL_GPL(mce_inject_log);
static struct notifier_block mce_srao_nb;
/* /*
* We run the default notifier if we have only the SRAO, the first and the * We run the default notifier if we have only the UC, the first and the
* default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS * default notifier registered. I.e., the mandatory NUM_DEFAULT_NOTIFIERS
* notifiers registered on the chain. * notifiers registered on the chain.
*/ */
...@@ -594,26 +583,29 @@ static struct notifier_block first_nb = { ...@@ -594,26 +583,29 @@ static struct notifier_block first_nb = {
.priority = MCE_PRIO_FIRST, .priority = MCE_PRIO_FIRST,
}; };
static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
void *data) void *data)
{ {
struct mce *mce = (struct mce *)data; struct mce *mce = (struct mce *)data;
unsigned long pfn; unsigned long pfn;
if (!mce) if (!mce || !mce_usable_address(mce))
return NOTIFY_DONE; return NOTIFY_DONE;
if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) { if (mce->severity != MCE_AO_SEVERITY &&
pfn = mce->addr >> PAGE_SHIFT; mce->severity != MCE_DEFERRED_SEVERITY)
if (!memory_failure(pfn, 0)) return NOTIFY_DONE;
set_mce_nospec(pfn);
} pfn = mce->addr >> PAGE_SHIFT;
if (!memory_failure(pfn, 0))
set_mce_nospec(pfn);
return NOTIFY_OK; return NOTIFY_OK;
} }
static struct notifier_block mce_srao_nb = {
.notifier_call = srao_decode_notifier, static struct notifier_block mce_uc_nb = {
.priority = MCE_PRIO_SRAO, .notifier_call = uc_decode_notifier,
.priority = MCE_PRIO_UC,
}; };
static int mce_default_notifier(struct notifier_block *nb, unsigned long val, static int mce_default_notifier(struct notifier_block *nb, unsigned long val,
...@@ -763,26 +755,22 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) ...@@ -763,26 +755,22 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
log_it: log_it:
error_seen = true; error_seen = true;
mce_read_aux(&m, i); if (flags & MCP_DONTLOG)
goto clear_it;
mce_read_aux(&m, i);
m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
/* /*
* Don't get the IP here because it's unlikely to * Don't get the IP here because it's unlikely to
* have anything to do with the actual error location. * have anything to do with the actual error location.
*/ */
if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
mce_log(&m);
else if (mce_usable_address(&m)) {
/*
* Although we skipped logging this, we still want
* to take action. Add to the pool so the registered
* notifiers will see it.
*/
if (!mce_gen_pool_add(&m))
mce_schedule_work();
}
if (mca_cfg.dont_log_ce && !mce_usable_address(&m))
goto clear_it;
mce_log(&m);
clear_it:
/* /*
* Clear state for this bank. * Clear state for this bank.
*/ */
...@@ -807,7 +795,7 @@ EXPORT_SYMBOL_GPL(machine_check_poll); ...@@ -807,7 +795,7 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
struct pt_regs *regs) struct pt_regs *regs)
{ {
char *tmp; char *tmp = *msg;
int i; int i;
for (i = 0; i < this_cpu_read(mce_num_banks); i++) { for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
...@@ -1232,8 +1220,8 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1232,8 +1220,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
DECLARE_BITMAP(toclear, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS);
struct mca_config *cfg = &mca_cfg; struct mca_config *cfg = &mca_cfg;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
char *msg = "Unknown";
struct mce m, *final; struct mce m, *final;
char *msg = NULL;
int worst = 0; int worst = 0;
/* /*
...@@ -1365,7 +1353,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) ...@@ -1365,7 +1353,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
ist_end_non_atomic(); ist_end_non_atomic();
} else { } else {
if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0)) if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0))
mce_panic("Failed kernel mode recovery", &m, NULL); mce_panic("Failed kernel mode recovery", &m, msg);
} }
out_ist: out_ist:
...@@ -2041,7 +2029,7 @@ int __init mcheck_init(void) ...@@ -2041,7 +2029,7 @@ int __init mcheck_init(void)
{ {
mcheck_intel_therm_init(); mcheck_intel_therm_init();
mce_register_decode_chain(&first_nb); mce_register_decode_chain(&first_nb);
mce_register_decode_chain(&mce_srao_nb); mce_register_decode_chain(&mce_uc_nb);
mce_register_decode_chain(&mce_default_nb); mce_register_decode_chain(&mce_default_nb);
mcheck_vendor_init_severity(); mcheck_vendor_init_severity();
......
...@@ -494,7 +494,7 @@ static void do_inject(void) ...@@ -494,7 +494,7 @@ static void do_inject(void)
i_mce.status |= MCI_STATUS_SYNDV; i_mce.status |= MCI_STATUS_SYNDV;
if (inj_type == SW_INJ) { if (inj_type == SW_INJ) {
mce_inject_log(&i_mce); mce_log(&i_mce);
return; return;
} }
......
...@@ -84,8 +84,6 @@ static inline int apei_clear_mce(u64 record_id) ...@@ -84,8 +84,6 @@ static inline int apei_clear_mce(u64 record_id)
} }
#endif #endif
void mce_inject_log(struct mce *m);
/* /*
* We consider records to be equivalent if bank+status+addr+misc all match. * We consider records to be equivalent if bank+status+addr+misc all match.
* This is only used when the system is going down because of a fatal error * This is only used when the system is going down because of a fatal error
......
...@@ -235,7 +235,7 @@ static void get_therm_status(int level, bool *proc_hot, u8 *temp) ...@@ -235,7 +235,7 @@ static void get_therm_status(int level, bool *proc_hot, u8 *temp)
*temp = (msr_val >> 16) & 0x7F; *temp = (msr_val >> 16) & 0x7F;
} }
static void throttle_active_work(struct work_struct *work) static void __maybe_unused throttle_active_work(struct work_struct *work)
{ {
struct _thermal_state *state = container_of(to_delayed_work(work), struct _thermal_state *state = container_of(to_delayed_work(work),
struct _thermal_state, therm_work); struct _thermal_state, therm_work);
......
...@@ -214,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate) ...@@ -214,7 +214,7 @@ static int __set_scrub_rate(struct amd64_pvt *pvt, u32 new_bw, u32 min_rate)
scrubval = scrubrates[i].scrubval; scrubval = scrubrates[i].scrubval;
if (pvt->fam == 0x17 || pvt->fam == 0x18) { if (pvt->umc) {
__f17h_set_scrubval(pvt, scrubval); __f17h_set_scrubval(pvt, scrubval);
} else if (pvt->fam == 0x15 && pvt->model == 0x60) { } else if (pvt->fam == 0x15 && pvt->model == 0x60) {
f15h_select_dct(pvt, 0); f15h_select_dct(pvt, 0);
...@@ -256,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci) ...@@ -256,18 +256,7 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
int i, retval = -EINVAL; int i, retval = -EINVAL;
u32 scrubval = 0; u32 scrubval = 0;
switch (pvt->fam) { if (pvt->umc) {
case 0x15:
/* Erratum #505 */
if (pvt->model < 0x10)
f15h_select_dct(pvt, 0);
if (pvt->model == 0x60)
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
break;
case 0x17:
case 0x18:
amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval); amd64_read_pci_cfg(pvt->F6, F17H_SCR_BASE_ADDR, &scrubval);
if (scrubval & BIT(0)) { if (scrubval & BIT(0)) {
amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval); amd64_read_pci_cfg(pvt->F6, F17H_SCR_LIMIT_ADDR, &scrubval);
...@@ -276,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci) ...@@ -276,11 +265,15 @@ static int get_scrub_rate(struct mem_ctl_info *mci)
} else { } else {
scrubval = 0; scrubval = 0;
} }
break; } else if (pvt->fam == 0x15) {
/* Erratum #505 */
if (pvt->model < 0x10)
f15h_select_dct(pvt, 0);
default: if (pvt->model == 0x60)
amd64_read_pci_cfg(pvt->F2, F15H_M60H_SCRCTRL, &scrubval);
} else {
amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval); amd64_read_pci_cfg(pvt->F3, SCRCTRL, &scrubval);
break;
} }
scrubval = scrubval & 0x001F; scrubval = scrubval & 0x001F;
...@@ -1055,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt) ...@@ -1055,6 +1048,16 @@ static void determine_memory_type(struct amd64_pvt *pvt)
{ {
u32 dram_ctrl, dcsm; u32 dram_ctrl, dcsm;
if (pvt->umc) {
if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
pvt->dram_type = MEM_LRDDR4;
else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
pvt->dram_type = MEM_RDDR4;
else
pvt->dram_type = MEM_DDR4;
return;
}
switch (pvt->fam) { switch (pvt->fam) {
case 0xf: case 0xf:
if (pvt->ext_model >= K8_REV_F) if (pvt->ext_model >= K8_REV_F)
...@@ -1100,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt) ...@@ -1100,16 +1103,6 @@ static void determine_memory_type(struct amd64_pvt *pvt)
case 0x16: case 0x16:
goto ddr3; goto ddr3;
case 0x17:
case 0x18:
if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(5))
pvt->dram_type = MEM_LRDDR4;
else if ((pvt->umc[0].dimm_cfg | pvt->umc[1].dimm_cfg) & BIT(4))
pvt->dram_type = MEM_RDDR4;
else
pvt->dram_type = MEM_DDR4;
return;
default: default:
WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam);
pvt->dram_type = MEM_EMPTY; pvt->dram_type = MEM_EMPTY;
...@@ -2336,6 +2329,16 @@ static struct amd64_family_type family_types[] = { ...@@ -2336,6 +2329,16 @@ static struct amd64_family_type family_types[] = {
.dbam_to_cs = f17_addr_mask_to_cs_size, .dbam_to_cs = f17_addr_mask_to_cs_size,
} }
}, },
[F19_CPUS] = {
.ctl_name = "F19h",
.f0_id = PCI_DEVICE_ID_AMD_19H_DF_F0,
.f6_id = PCI_DEVICE_ID_AMD_19H_DF_F6,
.max_mcs = 8,
.ops = {
.early_channel_count = f17_early_channel_count,
.dbam_to_cs = f17_addr_mask_to_cs_size,
}
},
}; };
/* /*
...@@ -3368,6 +3371,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) ...@@ -3368,6 +3371,12 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt)
family_types[F17_CPUS].ctl_name = "F18h"; family_types[F17_CPUS].ctl_name = "F18h";
break; break;
case 0x19:
fam_type = &family_types[F19_CPUS];
pvt->ops = &family_types[F19_CPUS].ops;
family_types[F19_CPUS].ctl_name = "F19h";
break;
default: default:
amd64_err("Unsupported family!\n"); amd64_err("Unsupported family!\n");
return NULL; return NULL;
...@@ -3623,6 +3632,7 @@ static const struct x86_cpu_id amd64_cpuids[] = { ...@@ -3623,6 +3632,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
{ X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_AMD, 0x17, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, { X86_VENDOR_HYGON, 0x18, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ X86_VENDOR_AMD, 0x19, X86_MODEL_ANY, X86_FEATURE_ANY, 0 },
{ } { }
}; };
MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
......
...@@ -122,6 +122,8 @@ ...@@ -122,6 +122,8 @@
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F6 0x1496
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F0 0x1440
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446
#define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650
#define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656
/* /*
* Function 1 - Address Map * Function 1 - Address Map
...@@ -292,6 +294,7 @@ enum amd_families { ...@@ -292,6 +294,7 @@ enum amd_families {
F17_M10H_CPUS, F17_M10H_CPUS,
F17_M30H_CPUS, F17_M30H_CPUS,
F17_M70H_CPUS, F17_M70H_CPUS,
F19_CPUS,
NUM_FAMILIES, NUM_FAMILIES,
}; };
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "mce_amd.h" #include "mce_amd.h"
static struct amd_decoder_ops *fam_ops; static struct amd_decoder_ops fam_ops;
static u8 xec_mask = 0xf; static u8 xec_mask = 0xf;
...@@ -175,6 +175,33 @@ static const char * const smca_ls_mce_desc[] = { ...@@ -175,6 +175,33 @@ static const char * const smca_ls_mce_desc[] = {
"L2 Fill Data error", "L2 Fill Data error",
}; };
static const char * const smca_ls2_mce_desc[] = {
"An ECC error was detected on a data cache read by a probe or victimization",
"An ECC error or L2 poison was detected on a data cache read by a load",
"An ECC error was detected on a data cache read-modify-write by a store",
"An ECC error or poison bit mismatch was detected on a tag read by a probe or victimization",
"An ECC error or poison bit mismatch was detected on a tag read by a load",
"An ECC error or poison bit mismatch was detected on a tag read by a store",
"An ECC error was detected on an EMEM read by a load",
"An ECC error was detected on an EMEM read-modify-write by a store",
"A parity error was detected in an L1 TLB entry by any access",
"A parity error was detected in an L2 TLB entry by any access",
"A parity error was detected in a PWC entry by any access",
"A parity error was detected in an STQ entry by any access",
"A parity error was detected in an LDQ entry by any access",
"A parity error was detected in a MAB entry by any access",
"A parity error was detected in an SCB entry state field by any access",
"A parity error was detected in an SCB entry address field by any access",
"A parity error was detected in an SCB entry data field by any access",
"A parity error was detected in a WCB entry by any access",
"A poisoned line was detected in an SCB entry by any access",
"A SystemReadDataError error was reported on read data returned from L2 for a load",
"A SystemReadDataError error was reported on read data returned from L2 for an SCB store",
"A SystemReadDataError error was reported on read data returned from L2 for a WCB store",
"A hardware assertion error was reported",
"A parity error was detected in an STLF, SCB EMEM entry or SRB store data by any access",
};
static const char * const smca_if_mce_desc[] = { static const char * const smca_if_mce_desc[] = {
"Op Cache Microtag Probe Port Parity Error", "Op Cache Microtag Probe Port Parity Error",
"IC Microtag or Full Tag Multi-hit Error", "IC Microtag or Full Tag Multi-hit Error",
...@@ -378,6 +405,7 @@ struct smca_mce_desc { ...@@ -378,6 +405,7 @@ struct smca_mce_desc {
static struct smca_mce_desc smca_mce_descs[] = { static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) },
[SMCA_LS_V2] = { smca_ls2_mce_desc, ARRAY_SIZE(smca_ls2_mce_desc) },
[SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) },
[SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) },
[SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) },
...@@ -555,7 +583,7 @@ static void decode_mc0_mce(struct mce *m) ...@@ -555,7 +583,7 @@ static void decode_mc0_mce(struct mce *m)
: (xec ? "multimatch" : "parity"))); : (xec ? "multimatch" : "parity")));
return; return;
} }
} else if (fam_ops->mc0_mce(ec, xec)) } else if (fam_ops.mc0_mce(ec, xec))
; ;
else else
pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n"); pr_emerg(HW_ERR "Corrupted MC0 MCE info?\n");
...@@ -669,7 +697,7 @@ static void decode_mc1_mce(struct mce *m) ...@@ -669,7 +697,7 @@ static void decode_mc1_mce(struct mce *m)
pr_cont("Hardware Assert.\n"); pr_cont("Hardware Assert.\n");
else else
goto wrong_mc1_mce; goto wrong_mc1_mce;
} else if (fam_ops->mc1_mce(ec, xec)) } else if (fam_ops.mc1_mce(ec, xec))
; ;
else else
goto wrong_mc1_mce; goto wrong_mc1_mce;
...@@ -803,7 +831,7 @@ static void decode_mc2_mce(struct mce *m) ...@@ -803,7 +831,7 @@ static void decode_mc2_mce(struct mce *m)
pr_emerg(HW_ERR "MC2 Error: "); pr_emerg(HW_ERR "MC2 Error: ");
if (!fam_ops->mc2_mce(ec, xec)) if (!fam_ops.mc2_mce(ec, xec))
pr_cont(HW_ERR "Corrupted MC2 MCE info?\n"); pr_cont(HW_ERR "Corrupted MC2 MCE info?\n");
} }
...@@ -1102,7 +1130,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) ...@@ -1102,7 +1130,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (m->tsc) if (m->tsc)
pr_emerg(HW_ERR "TSC: %llu\n", m->tsc); pr_emerg(HW_ERR "TSC: %llu\n", m->tsc);
if (!fam_ops) /* Doesn't matter which member to test. */
if (!fam_ops.mc0_mce)
goto err_code; goto err_code;
switch (m->bank) { switch (m->bank) {
...@@ -1157,80 +1186,73 @@ static int __init mce_amd_init(void) ...@@ -1157,80 +1186,73 @@ static int __init mce_amd_init(void)
c->x86_vendor != X86_VENDOR_HYGON) c->x86_vendor != X86_VENDOR_HYGON)
return -ENODEV; return -ENODEV;
fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); if (boot_cpu_has(X86_FEATURE_SMCA)) {
if (!fam_ops) xec_mask = 0x3f;
return -ENOMEM; goto out;
}
switch (c->x86) { switch (c->x86) {
case 0xf: case 0xf:
fam_ops->mc0_mce = k8_mc0_mce; fam_ops.mc0_mce = k8_mc0_mce;
fam_ops->mc1_mce = k8_mc1_mce; fam_ops.mc1_mce = k8_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce; fam_ops.mc2_mce = k8_mc2_mce;
break; break;
case 0x10: case 0x10:
fam_ops->mc0_mce = f10h_mc0_mce; fam_ops.mc0_mce = f10h_mc0_mce;
fam_ops->mc1_mce = k8_mc1_mce; fam_ops.mc1_mce = k8_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce; fam_ops.mc2_mce = k8_mc2_mce;
break; break;
case 0x11: case 0x11:
fam_ops->mc0_mce = k8_mc0_mce; fam_ops.mc0_mce = k8_mc0_mce;
fam_ops->mc1_mce = k8_mc1_mce; fam_ops.mc1_mce = k8_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce; fam_ops.mc2_mce = k8_mc2_mce;
break; break;
case 0x12: case 0x12:
fam_ops->mc0_mce = f12h_mc0_mce; fam_ops.mc0_mce = f12h_mc0_mce;
fam_ops->mc1_mce = k8_mc1_mce; fam_ops.mc1_mce = k8_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce; fam_ops.mc2_mce = k8_mc2_mce;
break; break;
case 0x14: case 0x14:
fam_ops->mc0_mce = cat_mc0_mce; fam_ops.mc0_mce = cat_mc0_mce;
fam_ops->mc1_mce = cat_mc1_mce; fam_ops.mc1_mce = cat_mc1_mce;
fam_ops->mc2_mce = k8_mc2_mce; fam_ops.mc2_mce = k8_mc2_mce;
break; break;
case 0x15: case 0x15:
xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f; xec_mask = c->x86_model == 0x60 ? 0x3f : 0x1f;
fam_ops->mc0_mce = f15h_mc0_mce; fam_ops.mc0_mce = f15h_mc0_mce;
fam_ops->mc1_mce = f15h_mc1_mce; fam_ops.mc1_mce = f15h_mc1_mce;
fam_ops->mc2_mce = f15h_mc2_mce; fam_ops.mc2_mce = f15h_mc2_mce;
break; break;
case 0x16: case 0x16:
xec_mask = 0x1f; xec_mask = 0x1f;
fam_ops->mc0_mce = cat_mc0_mce; fam_ops.mc0_mce = cat_mc0_mce;
fam_ops->mc1_mce = cat_mc1_mce; fam_ops.mc1_mce = cat_mc1_mce;
fam_ops->mc2_mce = f16h_mc2_mce; fam_ops.mc2_mce = f16h_mc2_mce;
break; break;
case 0x17: case 0x17:
case 0x18: case 0x18:
xec_mask = 0x3f; pr_warn("Decoding supported only on Scalable MCA processors.\n");
if (!boot_cpu_has(X86_FEATURE_SMCA)) { return -EINVAL;
printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
goto err_out;
}
break;
default: default:
printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86); printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
goto err_out; return -EINVAL;
} }
out:
pr_info("MCE: In-kernel MCE decoding enabled.\n"); pr_info("MCE: In-kernel MCE decoding enabled.\n");
mce_register_decode_chain(&amd_mce_dec_nb); mce_register_decode_chain(&amd_mce_dec_nb);
return 0; return 0;
err_out:
kfree(fam_ops);
fam_ops = NULL;
return -EINVAL;
} }
early_initcall(mce_amd_init); early_initcall(mce_amd_init);
...@@ -1238,7 +1260,6 @@ early_initcall(mce_amd_init); ...@@ -1238,7 +1260,6 @@ early_initcall(mce_amd_init);
static void __exit mce_amd_exit(void) static void __exit mce_amd_exit(void)
{ {
mce_unregister_decode_chain(&amd_mce_dec_nb); mce_unregister_decode_chain(&amd_mce_dec_nb);
kfree(fam_ops);
} }
MODULE_DESCRIPTION("AMD MCE decoder"); MODULE_DESCRIPTION("AMD MCE decoder");
......
...@@ -549,6 +549,7 @@ ...@@ -549,6 +549,7 @@
#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb
#define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493
#define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443
#define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653
#define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703
#define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment