Commit 37d43acf authored by Yazen Ghannam's avatar Yazen Ghannam Committed by Thomas Gleixner

x86/mce/AMD: Redo error logging from APIC LVT interrupt handlers

We have support for the new SMCA MCA_DE{STAT,ADDR} registers in Linux.
So we've used these registers in place of MCA_{STATUS,ADDR} on SMCA
systems.

However, the guidance for current SMCA implementations of is to continue
using MCA_{STATUS,ADDR} and to use MCA_DE{STAT,ADDR} only if a Deferred
error was not found in the former registers. If we logged a Deferred
error in MCA_STATUS then we should also clear MCA_DESTAT. This also
means we shouldn't clear MCA_CONFIG[LogDeferredInMcaStat].

Rework __log_error() to only log an error and add helpers for the
different error types being logged from the corresponding interrupt
handlers.

Boris: carve out common functionality into a _log_error_bank(). Cleanup
comments, check MCi_STATUS bits before reading MSRs. Streamline flow.
Signed-off-by: default avatarYazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: http://lkml.kernel.org/r/1493147772-2721-1-git-send-email-Yazen.Ghannam@amd.comSigned-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent 473e90b2
...@@ -471,20 +471,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, ...@@ -471,20 +471,6 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr,
*/ */
smca_high |= BIT(0); smca_high |= BIT(0);
/*
* SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
* registers with the option of additionally logging to
* MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
*
* This bit is usually set by BIOS to retain the old behavior
* for OSes that don't use the new registers. Linux supports the
* new registers so let's disable that additional logging here.
*
* MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
* portion of the MSR).
*/
smca_high &= ~BIT(2);
/* /*
* SMCA sets the Deferred Error Interrupt type per bank. * SMCA sets the Deferred Error Interrupt type per bank.
* *
...@@ -755,37 +741,19 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) ...@@ -755,37 +741,19 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
} }
EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr);
static void static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
{ {
u32 msr_status = msr_ops.status(bank);
u32 msr_addr = msr_ops.addr(bank);
struct mce m; struct mce m;
u64 status;
WARN_ON_ONCE(deferred_err && threshold_err);
if (deferred_err && mce_flags.smca) {
msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
}
rdmsrl(msr_status, status);
if (!(status & MCI_STATUS_VAL))
return;
mce_setup(&m); mce_setup(&m);
m.status = status; m.status = status;
m.misc = misc;
m.bank = bank; m.bank = bank;
m.tsc = rdtsc(); m.tsc = rdtsc();
if (threshold_err)
m.misc = misc;
if (m.status & MCI_STATUS_ADDRV) { if (m.status & MCI_STATUS_ADDRV) {
rdmsrl(msr_addr, m.addr); m.addr = addr;
/* /*
* Extract [55:<lsb>] where lsb is the least significant * Extract [55:<lsb>] where lsb is the least significant
...@@ -806,8 +774,6 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) ...@@ -806,8 +774,6 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
} }
mce_log(&m); mce_log(&m);
wrmsrl(msr_status, 0);
} }
static inline void __smp_deferred_error_interrupt(void) static inline void __smp_deferred_error_interrupt(void)
...@@ -832,45 +798,85 @@ asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void) ...@@ -832,45 +798,85 @@ asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void)
exiting_ack_irq(); exiting_ack_irq();
} }
/* APIC interrupt handler for deferred errors */ /*
static void amd_deferred_error_interrupt(void) * Returns true if the logged error is deferred. False, otherwise.
*/
static inline bool
_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
{ {
unsigned int bank; u64 status, addr = 0;
u32 msr_status;
u64 status;
for (bank = 0; bank < mca_cfg.banks; ++bank) { rdmsrl(msr_stat, status);
msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank) if (!(status & MCI_STATUS_VAL))
: msr_ops.status(bank); return false;
rdmsrl(msr_status, status); if (status & MCI_STATUS_ADDRV)
rdmsrl(msr_addr, addr);
if (!(status & MCI_STATUS_VAL) || __log_error(bank, status, addr, misc);
!(status & MCI_STATUS_DEFERRED))
continue;
__log_error(bank, true, false, 0); wrmsrl(status, 0);
break;
} return status & MCI_STATUS_DEFERRED;
} }
/* /*
* APIC Interrupt Handler * We have three scenarios for checking for Deferred errors:
*
* 1) Non-SMCA systems check MCA_STATUS and log error if found.
* 2) SMCA systems check MCA_STATUS. If error is found then log it and also
* clear MCA_DESTAT.
* 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
* log it.
*/ */
static void log_error_deferred(unsigned int bank)
{
bool defrd;
/* defrd = _log_error_bank(bank, msr_ops.status(bank),
* threshold interrupt handler will service THRESHOLD_APIC_VECTOR. msr_ops.addr(bank), 0);
* the interrupt goes off when error_count reaches threshold_limit.
* the handler will simply log mcelog w/ software defined bank number. if (!mce_flags.smca)
return;
/* Clear MCA_DESTAT if we logged the deferred error from MCA_STATUS. */
if (defrd) {
wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
return;
}
/*
* Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
* for a valid error.
*/ */
_log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
}
/* APIC interrupt handler for deferred errors */
static void amd_deferred_error_interrupt(void)
{
unsigned int bank;
for (bank = 0; bank < mca_cfg.banks; ++bank)
log_error_deferred(bank);
}
static void log_error_thresholding(unsigned int bank, u64 misc)
{
_log_error_bank(bank, msr_ops.status(bank), msr_ops.addr(bank), misc);
}
/*
* Threshold interrupt handler will service THRESHOLD_APIC_VECTOR. The interrupt
* goes off when error_count reaches threshold_limit.
*/
static void amd_threshold_interrupt(void) static void amd_threshold_interrupt(void)
{ {
u32 low = 0, high = 0, address = 0; u32 low = 0, high = 0, address = 0;
unsigned int bank, block, cpu = smp_processor_id(); unsigned int bank, block, cpu = smp_processor_id();
struct thresh_restart tr; struct thresh_restart tr;
/* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) { for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (!(per_cpu(bank_map, cpu) & (1 << bank))) if (!(per_cpu(bank_map, cpu) & (1 << bank)))
continue; continue;
...@@ -893,23 +899,18 @@ static void amd_threshold_interrupt(void) ...@@ -893,23 +899,18 @@ static void amd_threshold_interrupt(void)
(high & MASK_LOCKED_HI)) (high & MASK_LOCKED_HI))
continue; continue;
/* if (!(high & MASK_OVERFLOW_HI))
* Log the machine check that caused the threshold continue;
* event.
*/
if (high & MASK_OVERFLOW_HI)
goto log;
}
}
return;
log: /* Log the MCE which caused the threshold event. */
__log_error(bank, false, true, ((u64)high << 32) | low); log_error_thresholding(bank, ((u64)high << 32) | low);
/* Reset threshold block after logging error. */ /* Reset threshold block after logging error. */
memset(&tr, 0, sizeof(tr)); memset(&tr, 0, sizeof(tr));
tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block]; tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block];
threshold_restart_bank(&tr); threshold_restart_bank(&tr);
}
}
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment