#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/config.h>
#include <linux/irq.h>
#include <asm/processor.h> 
#include <asm/system.h>
#include <asm/msr.h>
#include <asm/apic.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>

#ifdef CONFIG_X86_MCE

static int mce_disabled __initdata = 0;

static int banks;

/*
 * If we get an MCE, we don't know what state the caches/TLB's are
 * going to be in, so we throw them all away.
 */
static void inline flush_all (void)
{   
	__asm__ __volatile__ ("invd": : );
	__flush_tlb();
}

/*
 *	P4/Xeon Thermal transition interrupt handler
 */

#ifdef CONFIG_X86_LOCAL_APIC
static void intel_thermal_interrupt(struct pt_regs *regs)
{
	u32 l, h;
	unsigned int cpu = smp_processor_id();

	ack_APIC_irq();

	rdmsr(MSR_IA32_THERM_STATUS, l, h);
	if (l & 1) {
		printk(KERN_EMERG "CPU#%d: Temperature above threshold\n", cpu);
		printk(KERN_EMERG "CPU#%d: Running in modulated clock mode\n", cpu);
	} else {
		printk(KERN_INFO "CPU#%d: Temperature/speed normal\n", cpu);
	}
}
#endif

static void unexpected_thermal_interrupt(struct pt_regs *regs)
{	
	printk(KERN_ERR "CPU#%d: Unexpected LVT TMR interrupt!\n", smp_processor_id());
}

/*
 *	Thermal interrupt handler for this CPU setup
 */

static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;

asmlinkage void smp_thermal_interrupt(struct pt_regs regs)
{
	vendor_thermal_interrupt(&regs);
}

/* P4/Xeon Thermal regulation detect and init */

static void __init intel_init_thermal(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_LOCAL_APIC
	u32 l, h;
	unsigned int cpu = smp_processor_id();

	/* Thermal monitoring */
	if (!test_bit(X86_FEATURE_ACPI, &c->x86_capability))
		return;	/* -ENODEV */
	
	/* Clock modulation */
	if (!test_bit(X86_FEATURE_ACC, &c->x86_capability))
		return;	/* -ENODEV */

	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
	/* first check if its enabled already, in which case there might
	 * be some SMM goo which handles it, so we can't even put a handler
	 * since it might be delivered via SMI already -zwanem.
	 */

	if (l & (1<<3)) {
		printk(KERN_DEBUG "CPU#%d: Thermal monitoring already enabled\n", cpu);
	} else {
		wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
		printk(KERN_INFO "CPU#%d: Thermal monitoring enabled\n", cpu);
	}

	/* check wether a vector already exists */	
	l = apic_read(APIC_LVTTHMR);
	if (l & 0xff) {
		printk(KERN_DEBUG "CPU#%d: Thermal LVT already handled\n", cpu);
		return; /* -EBUSY */
	}
	
	wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
	printk(KERN_INFO "CPU#%d: Thermal monitoring enabled\n", cpu);
	
	/* The temperature transition interrupt handler setup */
	l = THERMAL_APIC_VECTOR;		/* our delivery vector */
	l |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
	apic_write_around(APIC_LVTTHMR, l);

	rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x3 , h);

	/* ok we're good to go... */
	vendor_thermal_interrupt = intel_thermal_interrupt;
	l = apic_read(APIC_LVTTHMR);
	apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);

	return;
#endif
}

/*
 *	Machine Check Handler For PII/PIII
 */

static void intel_machine_check(struct pt_regs * regs, long error_code)
{
	int recover=1;
	u32 alow, ahigh, high, low;
	u32 mcgstl, mcgsth;
	int i;

	flush_all();

	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
	if(mcgstl&(1<<0))	/* Recoverable ? */
		recover=0;

	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl);
	
	for(i=0;i<banks;i++)
	{
		rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
		if(high&(1<<31))
		{
			if(high&(1<<29))
				recover|=1;
			if(high&(1<<25))
				recover|=2;
			printk(KERN_EMERG "Bank %d: %08x%08x", i, high, low);
			high&=~(1<<31);
			if(high&(1<<27))
			{
				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
				printk("[%08x%08x]", ahigh, alow);
			}
			if(high&(1<<26))
			{
				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
				printk(" at %08x%08x", ahigh, alow);
			}
			printk("\n");
			/* Clear it */
			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
			/* Serialize */
			wmb();
		}
	}
	
	if(recover&2)
		panic("CPU context corrupt");
	if(recover&1)
		panic("Unable to continue");
	printk(KERN_EMERG "Attempting to continue.\n");
	mcgstl&=~(1<<2);
	wrmsr(MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
}

/*
 *	Machine check handler for Pentium class Intel
 */
 
static void pentium_machine_check(struct pt_regs * regs, long error_code)
{
	u32 loaddr, hi, lotype;
	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
	printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
	if(lotype&(1<<5))
		printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
}

/*
 *	Machine check handler for WinChip C6
 */
 
static void winchip_machine_check(struct pt_regs * regs, long error_code)
{
	printk(KERN_EMERG "CPU#%d: Machine Check Exception.\n", smp_processor_id());
}

/*
 *	Handle unconfigured int18 (should never happen)
 */

static void unexpected_machine_check(struct pt_regs * regs, long error_code)
{	
	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
}

/*
 *	Call the installed machine check handler for this CPU setup.
 */

static void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;

asmlinkage void do_machine_check(struct pt_regs * regs, long error_code)
{
	machine_check_vector(regs, error_code);
}


#ifdef CONFIG_X86_MCE_NONFATAL
struct timer_list mce_timer;

static void mce_checkregs (unsigned int cpu)
{
	u32 low, high;
	int i;

	if (cpu!=smp_processor_id())
		BUG();

	for (i=0; i<banks; i++) {
		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);

		if ((low | high) != 0) {
			flush_all();
			printk (KERN_EMERG "MCE: The hardware reports a non fatal, correctable incident occured on CPU %d.\n", smp_processor_id());
			printk (KERN_EMERG "Bank %d: %08x%08x\n", i, high, low);

			/* Scrub the error so we don't pick it up in 5 seconds time. */
			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);

			/* Serialize */
			wmb();
		}
	}

	/* Refresh the timer. */
	mce_timer.expires = jiffies + 5 * HZ;
	add_timer (&mce_timer);
}

static void mce_timerfunc (unsigned long data)
{
	int i;

	for (i=0; i<smp_num_cpus; i++) {
		if (i == smp_processor_id())
			mce_checkregs(i);
		else
			smp_call_function (mce_checkregs, i, 1, 1);
	}
}
#endif


/*
 *	Set up machine check reporting for processors with Intel style MCE
 */

static void __init intel_mcheck_init(struct cpuinfo_x86 *c)
{
	u32 l, h;
	int i;
	static int done;
	
	/*
	 *	Check for MCE support
	 */

	if( !test_bit(X86_FEATURE_MCE, c->x86_capability) )
		return;	
	
	/*
	 *	Pentium machine check
	 */
	
	if(c->x86 == 5)
	{
		/* Default P5 to off as its often misconnected */
		if(mce_disabled != -1)
			return;
		machine_check_vector = pentium_machine_check;
		wmb();
		/* Read registers before enabling */
		rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
		rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
		if(done==0)
			printk(KERN_INFO "Intel old style machine check architecture supported.\n");
 		/* Enable MCE */
		set_in_cr4(X86_CR4_MCE);
		printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
		return;
	}
	

	/*
	 *	Check for PPro style MCA
	 */
	 		
	if( !test_bit(X86_FEATURE_MCA, c->x86_capability) )
		return;
		
	/* Ok machine check is available */
	
	machine_check_vector = intel_machine_check;
	wmb();
	
	if(done==0)
		printk(KERN_INFO "Intel machine check architecture supported.\n");
	rdmsr(MSR_IA32_MCG_CAP, l, h);
	if(l&(1<<8))	/* Control register present ? */
		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
	banks = l&0xff;

	/* Don't enable bank 0 on intel P6 cores, it goes bang quickly. */
	if (c->x86_vendor == X86_VENDOR_INTEL && c->x86 == 6) {
		for(i=1; i<banks; i++)
			wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
	} else {
		for(i=0; i<banks; i++)
			wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
	}

	for(i=0; i<banks; i++)
		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);

	set_in_cr4(X86_CR4_MCE);
	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id());
	
	intel_init_thermal(c);

	done=1;
}

/*
 *	Set up machine check reporting on the Winchip C6 series
 */
 
static void __init winchip_mcheck_init(struct cpuinfo_x86 *c)
{
	u32 lo, hi;
	/* Not supported on C3 */
	if(c->x86 != 5)
		return;
	/* Winchip C6 */
	machine_check_vector = winchip_machine_check;
	wmb();
	rdmsr(MSR_IDT_FCR1, lo, hi);
	lo|= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
	lo&= ~(1<<4);	/* Enable MCE */
	wrmsr(MSR_IDT_FCR1, lo, hi);
	set_in_cr4(X86_CR4_MCE);
	printk(KERN_INFO "Winchip machine check reporting enabled on CPU#%d.\n", smp_processor_id());
}


/*
 *	This has to be run for each processor
 */

void __init mcheck_init(struct cpuinfo_x86 *c)
{
	if(mce_disabled==1)
		return;

	switch(c->x86_vendor)
	{
		case X86_VENDOR_AMD:
			/* AMD K7 machine check is Intel like */
			if(c->x86 == 6) {
				intel_mcheck_init(c);
#ifdef CONFIG_X86_MCE_NONFATAL
				/* Set the timer to check for non-fatal errors every 5 seconds */
				init_timer (&mce_timer);
				mce_timer.expires = jiffies + 5 * HZ;
				mce_timer.data = 0;
				mce_timer.function = &mce_timerfunc;
				add_timer (&mce_timer);
#endif
			}
			break;

		case X86_VENDOR_INTEL:
			intel_mcheck_init(c);
			break;

		case X86_VENDOR_CENTAUR:
			winchip_mcheck_init(c);
			break;

		default:
			break;
	}
}

static int __init mcheck_disable(char *str)
{
	mce_disabled = 1;
	return 0;
}

static int __init mcheck_enable(char *str)
{
	mce_disabled = -1;
	return 0;
}

__setup("nomce", mcheck_disable);
__setup("mce", mcheck_enable);

#else
asmlinkage void do_machine_check(struct pt_regs * regs, long error_code) {}
asmlinkage void smp_thermal_interrupt(struct pt_regs regs) {}
void __init mcheck_init(struct cpuinfo_x86 *c) {}
#endif