Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
L
linux
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
linux
Commits
1d991001
Commit
1d991001
authored
Jun 20, 2009
by
Ingo Molnar
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'x86/mce3' into x86/urgent
parents
bc3f5d3d
b1f49f95
Changes
16
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
528 additions
and
508 deletions
+528
-508
arch/x86/include/asm/mce.h
arch/x86/include/asm/mce.h
+51
-12
arch/x86/include/asm/therm_throt.h
arch/x86/include/asm/therm_throt.h
+0
-9
arch/x86/kernel/cpu/mcheck/Makefile
arch/x86/kernel/cpu/mcheck/Makefile
+5
-4
arch/x86/kernel/cpu/mcheck/k7.c
arch/x86/kernel/cpu/mcheck/k7.c
+1
-2
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/mcheck/mce.c
+158
-73
arch/x86/kernel/cpu/mcheck/mce.h
arch/x86/kernel/cpu/mcheck/mce.h
+0
-38
arch/x86/kernel/cpu/mcheck/mce_amd.c
arch/x86/kernel/cpu/mcheck/mce_amd.c
+0
-0
arch/x86/kernel/cpu/mcheck/mce_intel.c
arch/x86/kernel/cpu/mcheck/mce_intel.c
+200
-54
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+0
-248
arch/x86/kernel/cpu/mcheck/non-fatal.c
arch/x86/kernel/cpu/mcheck/non-fatal.c
+1
-2
arch/x86/kernel/cpu/mcheck/p4.c
arch/x86/kernel/cpu/mcheck/p4.c
+1
-47
arch/x86/kernel/cpu/mcheck/p5.c
arch/x86/kernel/cpu/mcheck/p5.c
+6
-9
arch/x86/kernel/cpu/mcheck/p6.c
arch/x86/kernel/cpu/mcheck/p6.c
+1
-2
arch/x86/kernel/cpu/mcheck/therm_throt.c
arch/x86/kernel/cpu/mcheck/therm_throt.c
+102
-4
arch/x86/kernel/cpu/mcheck/winchip.c
arch/x86/kernel/cpu/mcheck/winchip.c
+1
-2
arch/x86/kernel/traps.c
arch/x86/kernel/traps.c
+1
-2
No files found.
arch/x86/include/asm/mce.h
View file @
1d991001
...
...
@@ -102,15 +102,39 @@ struct mce_log {
#ifdef __KERNEL__
#include <linux/percpu.h>
#include <linux/init.h>
#include <asm/atomic.h>
extern
int
mce_disabled
;
extern
int
mce_p5_enabled
;
#include <asm/atomic.h>
#include <linux/percpu.h>
#ifdef CONFIG_X86_MCE
void
mcheck_init
(
struct
cpuinfo_x86
*
c
);
#else
static
inline
void
mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
#endif
#ifdef CONFIG_X86_OLD_MCE
extern
int
nr_mce_banks
;
void
amd_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p4_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p6_mcheck_init
(
struct
cpuinfo_x86
*
c
);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
);
static
inline
void
enable_p5_mce
(
void
)
{
mce_p5_enabled
=
1
;
}
#else
static
inline
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
enable_p5_mce
(
void
)
{}
#endif
void
mce_setup
(
struct
mce
*
m
);
void
mce_log
(
struct
mce
*
m
);
DECLARE_PER_CPU
(
struct
sys_device
,
mce_dev
);
extern
void
(
*
threshold_cpu_callback
)(
unsigned
long
action
,
unsigned
int
cpu
);
/*
* To support more than 128 would need to escape the predefined
...
...
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c);
DECLARE_PER_CPU
(
unsigned
,
mce_exception_count
);
DECLARE_PER_CPU
(
unsigned
,
mce_poll_count
);
void
mce_log_therm_throt_event
(
__u64
status
);
extern
atomic_t
mce_entry
;
void
do_machine_check
(
struct
pt_regs
*
,
long
);
typedef
DECLARE_BITMAP
(
mce_banks_t
,
MAX_NR_BANKS
);
DECLARE_PER_CPU
(
mce_banks_t
,
mce_poll_banks
);
...
...
@@ -167,13 +187,32 @@ void mce_notify_process(void);
DECLARE_PER_CPU
(
struct
mce
,
injectm
);
extern
struct
file_operations
mce_chrdev_ops
;
#ifdef CONFIG_X86_MCE
void
mcheck_init
(
struct
cpuinfo_x86
*
c
);
#else
#define mcheck_init(c) do { } while (0)
#endif
/*
* Exception handler
*/
/* Call the installed machine check handler for this CPU setup. */
extern
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
);
void
do_machine_check
(
struct
pt_regs
*
,
long
);
/*
* Threshold handler
*/
extern
void
(
*
mce_threshold_vector
)(
void
);
extern
void
(
*
threshold_cpu_callback
)(
unsigned
long
action
,
unsigned
int
cpu
);
/*
* Thermal handler
*/
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
);
#ifdef CONFIG_X86_NEW_MCE
void
mce_log_therm_throt_event
(
__u64
status
);
#else
static
inline
void
mce_log_therm_throt_event
(
__u64
status
)
{}
#endif
#endif
/* __KERNEL__ */
#endif
/* _ASM_X86_MCE_H */
arch/x86/include/asm/therm_throt.h
deleted
100644 → 0
View file @
bc3f5d3d
#ifndef _ASM_X86_THERM_THROT_H
#define _ASM_X86_THERM_THROT_H
#include <asm/atomic.h>
extern
atomic_t
therm_throt_en
;
int
therm_throt_process
(
int
curr
);
#endif
/* _ASM_X86_THERM_THROT_H */
arch/x86/kernel/cpu/mcheck/Makefile
View file @
1d991001
obj-y
=
mce.o
therm_throt.o
obj-y
=
mce.o
obj-$(CONFIG_X86_NEW_MCE)
+=
mce-severity.o
obj-$(CONFIG_X86_OLD_MCE)
+=
k7.o p4.o p6.o
obj-$(CONFIG_X86_ANCIENT_MCE)
+=
winchip.o p5.o
obj-$(CONFIG_X86_MCE_P4THERMAL)
+=
mce_intel.o
obj-$(CONFIG_X86_MCE_INTEL)
+=
mce_intel_64.o mce_intel.o
obj-$(CONFIG_X86_MCE_AMD)
+=
mce_amd_64.o
obj-$(CONFIG_X86_MCE_INTEL)
+=
mce_intel.o
obj-$(CONFIG_X86_MCE_AMD)
+=
mce_amd.o
obj-$(CONFIG_X86_MCE_NONFATAL)
+=
non-fatal.o
obj-$(CONFIG_X86_MCE_THRESHOLD)
+=
threshold.o
obj-$(CONFIG_X86_MCE_INJECT)
+=
mce-inject.o
obj-$(CONFIG_X86_THERMAL_VECTOR)
+=
therm_throt.o
arch/x86/kernel/cpu/mcheck/k7.c
View file @
1d991001
...
...
@@ -10,10 +10,9 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For AMD Athlon/Duron: */
static
void
k7_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
...
...
arch/x86/kernel/cpu/mcheck/mce.c
View file @
1d991001
This diff is collapsed.
Click to expand it.
arch/x86/kernel/cpu/mcheck/mce.h
deleted
100644 → 0
View file @
bc3f5d3d
#include <linux/init.h>
#include <asm/mce.h>
#ifdef CONFIG_X86_OLD_MCE
void
amd_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p4_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
intel_p6_mcheck_init
(
struct
cpuinfo_x86
*
c
);
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
);
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
);
extern
int
mce_p5_enable
;
static
inline
int
mce_p5_enabled
(
void
)
{
return
mce_p5_enable
;
}
static
inline
void
enable_p5_mce
(
void
)
{
mce_p5_enable
=
1
;
}
#else
static
inline
void
intel_p5_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
void
winchip_mcheck_init
(
struct
cpuinfo_x86
*
c
)
{}
static
inline
int
mce_p5_enabled
(
void
)
{
return
0
;
}
static
inline
void
enable_p5_mce
(
void
)
{
}
#endif
/* Call the installed machine check handler for this CPU setup. */
extern
void
(
*
machine_check_vector
)(
struct
pt_regs
*
,
long
error_code
);
#ifdef CONFIG_X86_OLD_MCE
extern
int
nr_mce_banks
;
void
intel_set_thermal_handler
(
void
);
#else
static
inline
void
intel_set_thermal_handler
(
void
)
{
}
#endif
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
);
arch/x86/kernel/cpu/mcheck/mce_amd
_64
.c
→
arch/x86/kernel/cpu/mcheck/mce_amd.c
View file @
1d991001
File moved
arch/x86/kernel/cpu/mcheck/mce_intel.c
View file @
1d991001
/*
* Common code for Intel machine checks
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <
asm/therm_thro
t.h>
#include <
asm/processor
.h>
#include <
asm/system
.h>
#include <
linux/ini
t.h>
#include <
linux/interrupt
.h>
#include <
linux/percpu
.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
#include "mce.h"
static
DEFINE_PER_CPU
(
mce_banks_t
,
mce_banks_owned
);
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
)
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static
DEFINE_SPINLOCK
(
cmci_discover_lock
);
#define CMCI_THRESHOLD 1
static
int
cmci_supported
(
int
*
banks
)
{
unsigned
int
cpu
=
smp_processor_id
();
int
tm2
=
0
;
u32
l
,
h
;
u64
cap
;
if
(
mce_cmci_disabled
||
mce_ignore_ce
)
return
0
;
/*
* Thermal monitoring depends on ACPI, clock modulation
* and APIC as well
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if
(
!
cpu_has
(
c
,
X86_FEATURE_ACPI
)
||
!
cpu_has
(
c
,
X86_FEATURE_ACC
)
||
!
cpu_has
(
c
,
X86_FEATURE_APIC
))
{
pr_debug
(
"Thermal monitoring disabled
\n
"
);
return
;
if
(
boot_cpu_data
.
x86_vendor
!=
X86_VENDOR_INTEL
)
return
0
;
if
(
!
cpu_has_apic
||
lapic_get_maxlvt
()
<
6
)
return
0
;
rdmsrl
(
MSR_IA32_MCG_CAP
,
cap
);
*
banks
=
min_t
(
unsigned
,
MAX_NR_BANKS
,
cap
&
0xff
);
return
!!
(
cap
&
MCG_CMCI_P
);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static
void
intel_threshold_interrupt
(
void
)
{
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
mce_notify_irq
();
}
static
void
print_update
(
char
*
type
,
int
*
hdr
,
int
num
)
{
if
(
*
hdr
==
0
)
printk
(
KERN_INFO
"CPU %d MCA banks"
,
smp_processor_id
());
*
hdr
=
1
;
printk
(
KERN_CONT
" %s:%d"
,
type
,
num
);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static
void
cmci_discover
(
int
banks
,
int
boot
)
{
unsigned
long
*
owned
=
(
void
*
)
&
__get_cpu_var
(
mce_banks_owned
);
unsigned
long
flags
;
int
hdr
=
0
;
int
i
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
u64
val
;
if
(
test_bit
(
i
,
owned
))
continue
;
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Already owned by someone else? */
if
(
val
&
CMCI_EN
)
{
if
(
test_and_clear_bit
(
i
,
owned
)
||
boot
)
print_update
(
"SHD"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
continue
;
}
/*
* First check if its enabled already, in which case there might
* be some SMM goo which handles it, so we can't even put a handler
* since it might be delivered via SMI already:
val
|=
CMCI_EN
|
CMCI_THRESHOLD
;
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Did the enable bit stick? -- the bank supports CMCI */
if
(
val
&
CMCI_EN
)
{
if
(
!
test_and_set_bit
(
i
,
owned
)
||
boot
)
print_update
(
"CMCI"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
}
else
{
WARN_ON
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
)));
}
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
if
(
hdr
)
printk
(
KERN_CONT
"
\n
"
);
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
h
=
apic_read
(
APIC_LVTTHMR
);
if
((
l
&
MSR_IA32_MISC_ENABLE_TM1
)
&&
(
h
&
APIC_DM_SMI
))
{
printk
(
KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI
\n
"
,
cpu
);
void
cmci_recheck
(
void
)
{
unsigned
long
flags
;
int
banks
;
if
(
!
mce_available
(
&
current_cpu_data
)
||
!
cmci_supported
(
&
banks
))
return
;
}
local_irq_save
(
flags
);
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
local_irq_restore
(
flags
);
}
if
(
cpu_has
(
c
,
X86_FEATURE_TM2
)
&&
(
l
&
MSR_IA32_MISC_ENABLE_TM2
))
tm2
=
1
;
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void
cmci_clear
(
void
)
{
unsigned
long
flags
;
int
i
;
int
banks
;
u64
val
;
/* Check whether a vector already exists */
if
(
h
&
APIC_VECTOR_MASK
)
{
printk
(
KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed
\n
"
,
cpu
,
(
h
&
APIC_VECTOR_MASK
));
if
(
!
cmci_supported
(
&
banks
))
return
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
if
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
)))
continue
;
/* Disable CMCI */
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
val
&=
~
(
CMCI_EN
|
CMCI_THRESHOLD_MASK
);
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
));
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h
=
THERMAL_APIC_VECTOR
|
APIC_DM_FIXED
|
APIC_LVT_MASKED
;
apic_write
(
APIC_LVTTHMR
,
h
);
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void
cmci_rediscover
(
int
dying
)
{
int
banks
;
int
cpu
;
cpumask_var_t
old
;
rdmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
,
h
);
wrmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
|
(
THERM_INT_LOW_ENABLE
|
THERM_INT_HIGH_ENABLE
),
h
);
if
(
!
cmci_supported
(
&
banks
))
return
;
if
(
!
alloc_cpumask_var
(
&
old
,
GFP_KERNEL
))
return
;
cpumask_copy
(
old
,
&
current
->
cpus_allowed
);
for_each_online_cpu
(
cpu
)
{
if
(
cpu
==
dying
)
continue
;
if
(
set_cpus_allowed_ptr
(
current
,
cpumask_of
(
cpu
)))
continue
;
/* Recheck banks in case CPUs don't all have the same */
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
intel_set_thermal_handler
();
set_cpus_allowed_ptr
(
current
,
old
);
free_cpumask_var
(
old
);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void
cmci_reenable
(
void
)
{
int
banks
;
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
wrmsr
(
MSR_IA32_MISC_ENABLE
,
l
|
MSR_IA32_MISC_ENABLE_TM1
,
h
);
static
void
intel_init_cmci
(
void
)
{
int
banks
;
/* Unmask the thermal vector: */
l
=
apic_read
(
APIC_LVTTHMR
);
apic_write
(
APIC_LVTTHMR
,
l
&
~
APIC_LVT_MASKED
);
if
(
!
cmci_supported
(
&
banks
))
return
;
printk
(
KERN_INFO
"CPU%d: Thermal monitoring enabled (%s)
\n
"
,
cpu
,
tm2
?
"TM2"
:
"TM1"
);
mce_threshold_vector
=
intel_threshold_interrupt
;
cmci_discover
(
banks
,
1
);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write
(
APIC_LVTCMCI
,
THRESHOLD_APIC_VECTOR
|
APIC_DM_FIXED
);
cmci_recheck
();
}
/* enable thermal throttle processing */
atomic_set
(
&
therm_throt_en
,
1
);
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
)
{
intel_init_thermal
(
c
);
intel_init_cmci
();
}
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
deleted
100644 → 0
View file @
bc3f5d3d
/*
* Intel specific MCE features.
* Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
* Copyright (C) 2008, 2009 Intel Corporation
* Author: Andi Kleen
*/
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/msr.h>
#include <asm/mce.h>
#include <asm/hw_irq.h>
#include <asm/idle.h>
#include <asm/therm_throt.h>
#include "mce.h"
asmlinkage
void
smp_thermal_interrupt
(
void
)
{
__u64
msr_val
;
ack_APIC_irq
();
exit_idle
();
irq_enter
();
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
if
(
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
))
mce_log_therm_throt_event
(
msr_val
);
inc_irq_stat
(
irq_thermal_count
);
irq_exit
();
}
/*
* Support for Intel Correct Machine Check Interrupts. This allows
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
*/
static
DEFINE_PER_CPU
(
mce_banks_t
,
mce_banks_owned
);
/*
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
static
DEFINE_SPINLOCK
(
cmci_discover_lock
);
#define CMCI_THRESHOLD 1
static
int
cmci_supported
(
int
*
banks
)
{
u64
cap
;
if
(
mce_cmci_disabled
||
mce_ignore_ce
)
return
0
;
/*
* Vendor check is not strictly needed, but the initial
* initialization is vendor keyed and this
* makes sure none of the backdoors are entered otherwise.
*/
if
(
boot_cpu_data
.
x86_vendor
!=
X86_VENDOR_INTEL
)
return
0
;
if
(
!
cpu_has_apic
||
lapic_get_maxlvt
()
<
6
)
return
0
;
rdmsrl
(
MSR_IA32_MCG_CAP
,
cap
);
*
banks
=
min_t
(
unsigned
,
MAX_NR_BANKS
,
cap
&
0xff
);
return
!!
(
cap
&
MCG_CMCI_P
);
}
/*
* The interrupt handler. This is called on every event.
* Just call the poller directly to log any events.
* This could in theory increase the threshold under high load,
* but doesn't for now.
*/
static
void
intel_threshold_interrupt
(
void
)
{
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
mce_notify_irq
();
}
static
void
print_update
(
char
*
type
,
int
*
hdr
,
int
num
)
{
if
(
*
hdr
==
0
)
printk
(
KERN_INFO
"CPU %d MCA banks"
,
smp_processor_id
());
*
hdr
=
1
;
printk
(
KERN_CONT
" %s:%d"
,
type
,
num
);
}
/*
* Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
* on this CPU. Use the algorithm recommended in the SDM to discover shared
* banks.
*/
static
void
cmci_discover
(
int
banks
,
int
boot
)
{
unsigned
long
*
owned
=
(
void
*
)
&
__get_cpu_var
(
mce_banks_owned
);
unsigned
long
flags
;
int
hdr
=
0
;
int
i
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
u64
val
;
if
(
test_bit
(
i
,
owned
))
continue
;
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Already owned by someone else? */
if
(
val
&
CMCI_EN
)
{
if
(
test_and_clear_bit
(
i
,
owned
)
||
boot
)
print_update
(
"SHD"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
continue
;
}
val
|=
CMCI_EN
|
CMCI_THRESHOLD
;
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
/* Did the enable bit stick? -- the bank supports CMCI */
if
(
val
&
CMCI_EN
)
{
if
(
!
test_and_set_bit
(
i
,
owned
)
||
boot
)
print_update
(
"CMCI"
,
&
hdr
,
i
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
));
}
else
{
WARN_ON
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_poll_banks
)));
}
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
if
(
hdr
)
printk
(
KERN_CONT
"
\n
"
);
}
/*
* Just in case we missed an event during initialization check
* all the CMCI owned banks.
*/
void
cmci_recheck
(
void
)
{
unsigned
long
flags
;
int
banks
;
if
(
!
mce_available
(
&
current_cpu_data
)
||
!
cmci_supported
(
&
banks
))
return
;
local_irq_save
(
flags
);
machine_check_poll
(
MCP_TIMESTAMP
,
&
__get_cpu_var
(
mce_banks_owned
));
local_irq_restore
(
flags
);
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
*/
void
cmci_clear
(
void
)
{
unsigned
long
flags
;
int
i
;
int
banks
;
u64
val
;
if
(
!
cmci_supported
(
&
banks
))
return
;
spin_lock_irqsave
(
&
cmci_discover_lock
,
flags
);
for
(
i
=
0
;
i
<
banks
;
i
++
)
{
if
(
!
test_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
)))
continue
;
/* Disable CMCI */
rdmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
val
&=
~
(
CMCI_EN
|
CMCI_THRESHOLD_MASK
);
wrmsrl
(
MSR_IA32_MC0_CTL2
+
i
,
val
);
__clear_bit
(
i
,
__get_cpu_var
(
mce_banks_owned
));
}
spin_unlock_irqrestore
(
&
cmci_discover_lock
,
flags
);
}
/*
* After a CPU went down cycle through all the others and rediscover
* Must run in process context.
*/
void
cmci_rediscover
(
int
dying
)
{
int
banks
;
int
cpu
;
cpumask_var_t
old
;
if
(
!
cmci_supported
(
&
banks
))
return
;
if
(
!
alloc_cpumask_var
(
&
old
,
GFP_KERNEL
))
return
;
cpumask_copy
(
old
,
&
current
->
cpus_allowed
);
for_each_online_cpu
(
cpu
)
{
if
(
cpu
==
dying
)
continue
;
if
(
set_cpus_allowed_ptr
(
current
,
cpumask_of
(
cpu
)))
continue
;
/* Recheck banks in case CPUs don't all have the same */
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
set_cpus_allowed_ptr
(
current
,
old
);
free_cpumask_var
(
old
);
}
/*
* Reenable CMCI on this CPU in case a CPU down failed.
*/
void
cmci_reenable
(
void
)
{
int
banks
;
if
(
cmci_supported
(
&
banks
))
cmci_discover
(
banks
,
0
);
}
static
void
intel_init_cmci
(
void
)
{
int
banks
;
if
(
!
cmci_supported
(
&
banks
))
return
;
mce_threshold_vector
=
intel_threshold_interrupt
;
cmci_discover
(
banks
,
1
);
/*
* For CPU #0 this runs with still disabled APIC, but that's
* ok because only the vector is set up. We still do another
* check for the banks later for CPU #0 just to make sure
* to not miss any events.
*/
apic_write
(
APIC_LVTCMCI
,
THRESHOLD_APIC_VECTOR
|
APIC_DM_FIXED
);
cmci_recheck
();
}
void
mce_intel_feature_init
(
struct
cpuinfo_x86
*
c
)
{
intel_init_thermal
(
c
);
intel_init_cmci
();
}
arch/x86/kernel/cpu/mcheck/non-fatal.c
View file @
1d991001
...
...
@@ -17,10 +17,9 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
static
int
firstbank
;
#define MCE_RATE (15*HZ)
/* timer rate is 15s */
...
...
arch/x86/kernel/cpu/mcheck/p4.c
View file @
1d991001
/*
* P4 specific Machine Check Exception Reporting
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <asm/therm_throt.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* as supported by the P4/Xeon family */
struct
intel_mce_extended_msrs
{
u32
eax
;
...
...
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs {
static
int
mce_num_extended_msrs
;
#ifdef CONFIG_X86_MCE_P4THERMAL
static
void
unexpected_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
printk
(
KERN_ERR
"CPU%d: Unexpected LVT TMR interrupt!
\n
"
,
smp_processor_id
());
add_taint
(
TAINT_MACHINE_CHECK
);
}
/* P4/Xeon Thermal transition interrupt handler: */
static
void
intel_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
__u64
msr_val
;
ack_APIC_irq
();
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
);
}
/* Thermal interrupt handler for this CPU setup: */
static
void
(
*
vendor_thermal_interrupt
)(
struct
pt_regs
*
regs
)
=
unexpected_thermal_interrupt
;
void
smp_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
irq_enter
();
vendor_thermal_interrupt
(
regs
);
__get_cpu_var
(
irq_stat
).
irq_thermal_count
++
;
irq_exit
();
}
void
intel_set_thermal_handler
(
void
)
{
vendor_thermal_interrupt
=
intel_thermal_interrupt
;
}
#endif
/* CONFIG_X86_MCE_P4THERMAL */
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
static
void
intel_get_extended_msrs
(
struct
intel_mce_extended_msrs
*
r
)
{
...
...
arch/x86/kernel/cpu/mcheck/p5.c
View file @
1d991001
...
...
@@ -10,12 +10,11 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* By default disabled */
int
mce_p5_enable
;
int
mce_p5_enabled
__read_mostly
;
/* Machine check handler for Pentium class Intel CPUs: */
static
void
pentium_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
...
...
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32
l
,
h
;
/*
Check for MCE support
: */
if
(
!
cpu_has
(
c
,
X86_FEATURE_MCE
)
)
/*
Default P5 to off as its often misconnected
: */
if
(
!
mce_p5_enabled
)
return
;
#ifdef CONFIG_X86_OLD_MCE
/* Default P5 to off as its often misconnected: */
if
(
mce_disabled
!=
-
1
)
/* Check for MCE support: */
if
(
!
cpu_has
(
c
,
X86_FEATURE_MCE
))
return
;
#endif
machine_check_vector
=
pentium_machine_check
;
/* Make sure the vector pointer is visible before we enable MCEs: */
...
...
arch/x86/kernel/cpu/mcheck/p6.c
View file @
1d991001
...
...
@@ -10,10 +10,9 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine Check Handler For PII/PIII */
static
void
intel_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
...
...
arch/x86/kernel/cpu/mcheck/therm_throt.c
View file @
1d991001
...
...
@@ -13,13 +13,23 @@
* Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
* Inspired by Ross Biro's and Al Borchers' counter code.
*/
#include <linux/interrupt.h>
#include <linux/notifier.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/sysdev.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <asm/therm_throt.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/idle.h>
#include <asm/mce.h>
#include <asm/msr.h>
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
...
...
@@ -27,7 +37,7 @@
static
DEFINE_PER_CPU
(
__u64
,
next_check
)
=
INITIAL_JIFFIES
;
static
DEFINE_PER_CPU
(
unsigned
long
,
thermal_throttle_count
);
atomic_t
therm_throt_en
=
ATOMIC_INIT
(
0
);
static
atomic_t
therm_throt_en
=
ATOMIC_INIT
(
0
);
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
...
...
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been
* printed to the syslog.
*/
int
therm_throt_process
(
int
curr
)
static
int
therm_throt_process
(
int
curr
)
{
unsigned
int
cpu
=
smp_processor_id
();
__u64
tmp_jiffs
=
get_jiffies_64
();
...
...
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void)
return
0
;
}
device_initcall
(
thermal_throttle_init_device
);
#endif
/* CONFIG_SYSFS */
/* Thermal transition interrupt handler */
static
void
intel_thermal_interrupt
(
void
)
{
__u64
msr_val
;
rdmsrl
(
MSR_IA32_THERM_STATUS
,
msr_val
);
if
(
therm_throt_process
(
msr_val
&
THERM_STATUS_PROCHOT
))
mce_log_therm_throt_event
(
msr_val
);
}
static
void
unexpected_thermal_interrupt
(
void
)
{
printk
(
KERN_ERR
"CPU%d: Unexpected LVT TMR interrupt!
\n
"
,
smp_processor_id
());
add_taint
(
TAINT_MACHINE_CHECK
);
}
static
void
(
*
smp_thermal_vector
)(
void
)
=
unexpected_thermal_interrupt
;
asmlinkage
void
smp_thermal_interrupt
(
struct
pt_regs
*
regs
)
{
exit_idle
();
irq_enter
();
inc_irq_stat
(
irq_thermal_count
);
smp_thermal_vector
();
irq_exit
();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq
();
}
void
intel_init_thermal
(
struct
cpuinfo_x86
*
c
)
{
unsigned
int
cpu
=
smp_processor_id
();
int
tm2
=
0
;
u32
l
,
h
;
/* Thermal monitoring depends on ACPI and clock modulation*/
if
(
!
cpu_has
(
c
,
X86_FEATURE_ACPI
)
||
!
cpu_has
(
c
,
X86_FEATURE_ACC
))
return
;
/*
* First check if its enabled already, in which case there might
* be some SMM goo which handles it, so we can't even put a handler
* since it might be delivered via SMI already:
*/
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
h
=
apic_read
(
APIC_LVTTHMR
);
if
((
l
&
MSR_IA32_MISC_ENABLE_TM1
)
&&
(
h
&
APIC_DM_SMI
))
{
printk
(
KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI
\n
"
,
cpu
);
return
;
}
if
(
cpu_has
(
c
,
X86_FEATURE_TM2
)
&&
(
l
&
MSR_IA32_MISC_ENABLE_TM2
))
tm2
=
1
;
/* Check whether a vector already exists */
if
(
h
&
APIC_VECTOR_MASK
)
{
printk
(
KERN_DEBUG
"CPU%d: Thermal LVT vector (%#x) already installed
\n
"
,
cpu
,
(
h
&
APIC_VECTOR_MASK
));
return
;
}
/* We'll mask the thermal vector in the lapic till we're ready: */
h
=
THERMAL_APIC_VECTOR
|
APIC_DM_FIXED
|
APIC_LVT_MASKED
;
apic_write
(
APIC_LVTTHMR
,
h
);
rdmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
,
h
);
wrmsr
(
MSR_IA32_THERM_INTERRUPT
,
l
|
(
THERM_INT_LOW_ENABLE
|
THERM_INT_HIGH_ENABLE
),
h
);
smp_thermal_vector
=
intel_thermal_interrupt
;
rdmsr
(
MSR_IA32_MISC_ENABLE
,
l
,
h
);
wrmsr
(
MSR_IA32_MISC_ENABLE
,
l
|
MSR_IA32_MISC_ENABLE_TM1
,
h
);
/* Unmask the thermal vector: */
l
=
apic_read
(
APIC_LVTTHMR
);
apic_write
(
APIC_LVTTHMR
,
l
&
~
APIC_LVT_MASKED
);
printk
(
KERN_INFO
"CPU%d: Thermal monitoring enabled (%s)
\n
"
,
cpu
,
tm2
?
"TM2"
:
"TM1"
);
/* enable thermal throttle processing */
atomic_set
(
&
therm_throt_en
,
1
);
}
arch/x86/kernel/cpu/mcheck/winchip.c
View file @
1d991001
...
...
@@ -9,10 +9,9 @@
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include "mce.h"
/* Machine check handler for WinChip C6: */
static
void
winchip_machine_check
(
struct
pt_regs
*
regs
,
long
error_code
)
{
...
...
arch/x86/kernel/traps.c
View file @
1d991001
...
...
@@ -53,6 +53,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mce.h>
#include <asm/mach_traps.h>
...
...
@@ -64,8 +65,6 @@
#include <asm/setup.h>
#include <asm/traps.h>
#include "cpu/mcheck/mce.h"
asmlinkage
int
system_call
(
void
);
/* Do we ignore FPU interrupts ? */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment