Commit 17723e94 authored by Todd Inglett's avatar Todd Inglett Committed by Anton Blanchard

ppc64: revised machine check exception handler

parent 0ff2df7e
...@@ -151,61 +151,87 @@ SystemResetException(struct pt_regs *regs) ...@@ -151,61 +151,87 @@ SystemResetException(struct pt_regs *regs)
/* What should we do here? We could issue a shutdown or hard reset. */ /* What should we do here? We could issue a shutdown or hard reset. */
} }
static int power4_handle_mce(struct pt_regs *regs) /*
* See if we can recover from a machine check exception.
* This is only called on power4 (or above) and only via
* the Firmware Non-Maskable Interrupts (fwnmi) handler
* which provides the error analysis for us.
*
* Return 1 if corrected (or delivered a signal).
* Return 0 if there is nothing we can do.
*/
static int recover_mce(struct pt_regs *regs, struct rtas_error_log err)
{ {
siginfo_t info;
if (err.disposition == DISP_FULLY_RECOVERED) {
/* Platform corrected itself */
return 1;
} else if ((regs->msr & MSR_RI) &&
user_mode(regs) &&
err.severity == SEVERITY_ERROR_SYNC &&
err.disposition == DISP_NOT_RECOVERED &&
err.target == TARGET_MEMORY &&
err.type == TYPE_ECC_UNCORR &&
!(current->pid == 0 || current->pid == 1)) {
/* Kill off a user process with an ECC error */
info.si_signo = SIGBUS;
info.si_errno = 0;
/* XXX something better for ECC error? */
info.si_code = BUS_ADRERR;
info.si_addr = (void *)regs->nip;
printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
current->pid);
_exception(SIGBUS, &info, regs);
return 1;
}
return 0; return 0;
} }
/*
* Handle a machine check.
*
* Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
* should be present. If so the handler which called us tells us if the
* error was recovered (never true if RI=0).
*
* On hardware prior to Power 4 these exceptions were asynchronous which
* means we can't tell exactly where it occurred and so we can't recover.
*
* Note that the debugger should test RI=0 and warn the user that system
* state has been corrupted.
*/
void void
MachineCheckException(struct pt_regs *regs) MachineCheckException(struct pt_regs *regs)
{ {
siginfo_t info; struct rtas_error_log err, *errp;
if (fwnmi_active) { if (fwnmi_active) {
struct rtas_error_log *errhdr = FWNMI_get_errinfo(regs); errp = FWNMI_get_errinfo(regs);
if (errhdr) { if (errp)
/* ToDo: attempt to recover from some errors here */ err = *errp;
} FWNMI_release_errinfo(); /* frees errp */
FWNMI_release_errinfo(); if (errp && recover_mce(regs, err))
return;
} }
if (!user_mode(regs)) {
/* Attempt to recover if the interrupt is recoverable */
if (regs->msr & MSR_RI) {
if ((__is_processor(PV_POWER4) ||
__is_processor(PV_POWER4p)) &&
power4_handle_mce(regs))
return;
}
#ifdef CONFIG_DEBUG_KERNEL #ifdef CONFIG_DEBUG_KERNEL
if (debugger_fault_handler) { if (debugger_fault_handler) {
debugger_fault_handler(regs); debugger_fault_handler(regs);
return; return;
}
if (debugger)
debugger(regs);
#endif
console_verbose();
spin_lock_irq(&die_lock);
bust_spinlocks(1);
printk("Machine check in kernel mode.\n");
printk("Caused by (from SRR1=%lx): ", regs->msr);
show_regs(regs);
bust_spinlocks(0);
spin_unlock_irq(&die_lock);
panic("Unrecoverable Machine Check");
} }
if (debugger)
/* debugger(regs);
* XXX we should check RI bit on exception exit and kill the #endif
* task if it was cleared console_verbose();
*/ spin_lock_irq(&die_lock);
info.si_signo = SIGBUS; bust_spinlocks(1);
info.si_errno = 0; printk("Machine check in kernel mode.\n");
info.si_code = BUS_ADRERR; printk("Caused by (from SRR1=%lx): ", regs->msr);
info.si_addr = (void *)regs->nip; show_regs(regs);
_exception(SIGSEGV, &info, regs); bust_spinlocks(0);
spin_unlock_irq(&die_lock);
panic("Unrecoverable Machine Check");
} }
void void
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment