Commit b20c99eb authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS changes from Ingo Molnar:
 "[ The reason for drivers/ updates is that Boris asked for the
    drivers/edac/ changes to go via x86/ras in this cycle ]

  Main changes:

   - AMD CPUs:
      . Add ECC event decoding support for new F15h models
      . Various erratum fixes
      . Fix single-channel on dual-channel-controllers bug.

   - Intel CPUs:
      . UC uncorrectable memory error parsing fix
      . Add support for CMC (Corrected Machine Check) 'FF' (Firmware
        First) flag in the APEI HEST

   - Various cleanups and fixes"

* 'x86-ras-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  amd64_edac: Fix incorrect wraparounds
  amd64_edac: Correct erratum 505 range
  cpc925_edac: Use proper array termination
  x86/mce, acpi/apei: Only disable banks listed in HEST if mce is configured
  amd64_edac: Get rid of boot_cpu_data accesses
  amd64_edac: Add ECC decoding support for newer F15h models
  x86, amd_nb: Clarify F15h, model 30h GART and L3 support
  pci_ids: Add PCI device ID functions 3 and 4 for newer F15h models.
  x38_edac: Make a local function static
  i3200_edac: Make a local function static
  x86/mce: Pay no attention to 'F' bit in MCACOD when parsing 'UC' errors
  APEI/ERST: Fix error message formatting
  amd64_edac: Fix single-channel setups
  EDAC: Replace strict_strtol() with kstrtol()
  mce: acpi/apei: Soft-offline a page on firmware GHES notification
  mce: acpi/apei: Add a boot option to disable ff mode for corrected errors
  mce: acpi/apei: Honour Firmware First for MCA banks listed in APEI HEST CMC
parents bb8c4701 ead6fa95
......@@ -176,6 +176,11 @@ ACPI
acpi=noirq Don't route interrupts
acpi=nocmcff Disable firmware first mode for corrected errors. This
disables parsing the HEST CMC error source to check if
firmware has set the FF flag. This may result in
duplicate corrected error reports.
PCI
pci=off Don't use PCI
......
......@@ -86,6 +86,7 @@ extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
extern int acpi_fix_pin2_polarity;
extern int acpi_disable_cmcff;
extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
......@@ -168,6 +169,7 @@ static inline void arch_acpi_set_pdc_bits(u32 *buf)
#define acpi_lapic 0
#define acpi_ioapic 0
#define acpi_disable_cmcff 0
static inline void acpi_noirq_set(void) { }
static inline void acpi_disable_pci(void) { }
static inline void disable_acpi(void) { }
......
......@@ -32,11 +32,20 @@
#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */
#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */
#define MCI_STATUS_AR (1ULL<<55) /* Action required */
#define MCACOD 0xffff /* MCA Error Code */
/*
* Note that the full MCACOD field of IA32_MCi_STATUS MSR is
* bits 15:0. But bit 12 is the 'F' bit, defined for corrected
* errors to indicate that errors are being filtered by hardware.
* We should mask out bit 12 when looking for specific signatures
* of uncorrected errors - so the F bit is deliberately skipped
* in this #define.
*/
#define MCACOD 0xefff /* MCA Error Code */
/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */
#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */
#define MCACOD_SCRUBMSK 0xfff0
#define MCACOD_SCRUBMSK 0xeff0 /* Skip bit 12 ('F' bit) */
#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */
#define MCACOD_DATA 0x0134 /* Data Load */
#define MCACOD_INSTR 0x0150 /* Instruction Fetch */
......@@ -188,6 +197,9 @@ extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
const char __user *ubuf,
size_t usize, loff_t *off));
/* Disable CMCI/polling for MCA bank claimed by firmware */
extern void mce_disable_bank(int bank);
/*
* Exception handler
*/
......
......@@ -67,6 +67,7 @@ EXPORT_SYMBOL(acpi_pci_disabled);
int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
int acpi_disable_cmcff;
u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
......@@ -1622,6 +1623,10 @@ static int __init parse_acpi(char *arg)
/* "acpi=copy_dsdt" copys DSDT */
else if (strcmp(arg, "copy_dsdt") == 0) {
acpi_gbl_copy_dsdt_locally = 1;
}
/* "acpi=nocmcff" disables FF mode for corrected errors */
else if (strcmp(arg, "nocmcff") == 0) {
acpi_disable_cmcff = 1;
} else {
/* Core will printk when we return error. */
return -EINVAL;
......
......@@ -20,6 +20,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
{}
};
......@@ -27,6 +28,7 @@ EXPORT_SYMBOL(amd_nb_misc_ids);
static const struct pci_device_id amd_nb_link_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
{}
};
......@@ -81,12 +83,19 @@ int amd_cache_northbridges(void)
next_northbridge(misc, amd_nb_misc_ids);
node_to_amd_nb(i)->link = link =
next_northbridge(link, amd_nb_link_ids);
}
}
/* GART present only on Fam15h upto model 0fh */
if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
boot_cpu_data.x86 == 0x15)
(boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model < 0x10))
amd_northbridges.flags |= AMD_NB_GART;
/*
* Check for L3 cache presence.
*/
if (!cpuid_edx(0x80000006))
return 0;
/*
* Some CPU families support L3 Cache Index Disable. There are some
* limitations because of E382 and E388 on family 0x10.
......
......@@ -25,15 +25,18 @@ int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void);
extern struct mce_bank *mce_banks;
extern mce_banks_t mce_banks_ce_disabled;
#ifdef CONFIG_X86_MCE_INTEL
unsigned long mce_intel_adjust_timer(unsigned long interval);
void mce_intel_cmci_poll(void);
void mce_intel_hcpu_update(unsigned long cpu);
void cmci_disable_bank(int bank);
#else
# define mce_intel_adjust_timer mce_adjust_timer_default
static inline void mce_intel_cmci_poll(void) { }
static inline void mce_intel_hcpu_update(unsigned long cpu) { }
static inline void cmci_disable_bank(int bank) { }
#endif
void mce_timer_kick(unsigned long interval);
......
......@@ -97,6 +97,15 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
};
/*
* MCA banks controlled through firmware first for corrected errors.
* This is a global list of banks for which we won't enable CMCI and we
* won't poll. Firmware controls these banks and is responsible for
* reporting corrected errors through GHES. Uncorrected/recoverable
* errors are still notified through a machine check.
*/
mce_banks_t mce_banks_ce_disabled;
static DEFINE_PER_CPU(struct work_struct, mce_work);
static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
......@@ -1935,6 +1944,25 @@ static struct miscdevice mce_chrdev_device = {
&mce_chrdev_ops,
};
static void __mce_disable_bank(void *arg)
{
int bank = *((int *)arg);
__clear_bit(bank, __get_cpu_var(mce_poll_banks));
cmci_disable_bank(bank);
}
void mce_disable_bank(int bank)
{
if (bank >= mca_cfg.banks) {
pr_warn(FW_BUG
"Ignoring request to disable invalid MCA bank %d.\n",
bank);
return;
}
set_bit(bank, mce_banks_ce_disabled);
on_each_cpu(__mce_disable_bank, &bank, 1);
}
/*
* mce=off Disables machine check
* mce=no_cmci Disables CMCI
......
......@@ -203,6 +203,10 @@ static void cmci_discover(int banks)
if (test_bit(i, owned))
continue;
/* Skip banks in firmware first mode */
if (test_bit(i, mce_banks_ce_disabled))
continue;
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
/* Already owned by someone else? */
......@@ -271,6 +275,19 @@ void cmci_recheck(void)
local_irq_restore(flags);
}
/* Caller must hold the lock on cmci_discover_lock */
static void __cmci_disable_bank(int bank)
{
u64 val;
if (!test_bit(bank, __get_cpu_var(mce_banks_owned)))
return;
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
__clear_bit(bank, __get_cpu_var(mce_banks_owned));
}
/*
* Disable CMCI on this CPU for all banks it owns when it goes down.
* This allows other CPUs to claim the banks on rediscovery.
......@@ -280,20 +297,12 @@ void cmci_clear(void)
unsigned long flags;
int i;
int banks;
u64 val;
if (!cmci_supported(&banks))
return;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
continue;
/* Disable CMCI */
rdmsrl(MSR_IA32_MCx_CTL2(i), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(i), val);
__clear_bit(i, __get_cpu_var(mce_banks_owned));
}
for (i = 0; i < banks; i++)
__cmci_disable_bank(i);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
......@@ -327,6 +336,19 @@ void cmci_reenable(void)
cmci_discover(banks);
}
void cmci_disable_bank(int bank)
{
int banks;
unsigned long flags;
if (!cmci_supported(&banks))
return;
raw_spin_lock_irqsave(&cmci_discover_lock, flags);
__cmci_disable_bank(bank);
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void intel_init_cmci(void)
{
int banks;
......
......@@ -39,7 +39,8 @@
#include "apei-internal.h"
#define ERST_PFX "ERST: "
#undef pr_fmt
#define pr_fmt(fmt) "ERST: " fmt
/* ERST command status */
#define ERST_STATUS_SUCCESS 0x0
......@@ -109,8 +110,7 @@ static inline int erst_errno(int command_status)
static int erst_timedout(u64 *t, u64 spin_unit)
{
if ((s64)*t < spin_unit) {
pr_warning(FW_WARN ERST_PFX
"Firmware does not respond in time\n");
pr_warn(FW_WARN "Firmware does not respond in time.\n");
return 1;
}
*t -= spin_unit;
......@@ -186,8 +186,8 @@ static int erst_exec_stall(struct apei_exec_context *ctx,
if (ctx->value > FIRMWARE_MAX_STALL) {
if (!in_nmi())
pr_warning(FW_WARN ERST_PFX
"Too long stall time for stall instruction: %llx.\n",
pr_warn(FW_WARN
"Too long stall time for stall instruction: 0x%llx.\n",
ctx->value);
stall_time = FIRMWARE_MAX_STALL;
} else
......@@ -206,8 +206,8 @@ static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
if (ctx->var1 > FIRMWARE_MAX_STALL) {
if (!in_nmi())
pr_warning(FW_WARN ERST_PFX
"Too long stall time for stall while true instruction: %llx.\n",
pr_warn(FW_WARN
"Too long stall time for stall while true instruction: 0x%llx.\n",
ctx->var1);
stall_time = FIRMWARE_MAX_STALL;
} else
......@@ -271,8 +271,7 @@ static int erst_exec_move_data(struct apei_exec_context *ctx,
/* ioremap does not work in interrupt context */
if (in_interrupt()) {
pr_warning(ERST_PFX
"MOVE_DATA can not be used in interrupt context");
pr_warn("MOVE_DATA can not be used in interrupt context.\n");
return -EBUSY;
}
......@@ -524,8 +523,7 @@ static int __erst_record_id_cache_add_one(void)
ERST_RECORD_ID_CACHE_SIZE_MAX);
if (new_size <= erst_record_id_cache.size) {
if (printk_ratelimit())
pr_warning(FW_WARN ERST_PFX
"too many record ID!\n");
pr_warn(FW_WARN "too many record IDs!\n");
return 0;
}
alloc_size = new_size * sizeof(entries[0]);
......@@ -761,8 +759,7 @@ static int __erst_clear_from_storage(u64 record_id)
static void pr_unimpl_nvram(void)
{
if (printk_ratelimit())
pr_warning(ERST_PFX
"NVRAM ERST Log Address Range is not implemented yet\n");
pr_warn("NVRAM ERST Log Address Range not implemented yet.\n");
}
static int __erst_write_to_nvram(const struct cper_record_header *record)
......@@ -1133,7 +1130,7 @@ static int __init erst_init(void)
goto err;
if (erst_disable) {
pr_info(ERST_PFX
pr_info(
"Error Record Serialization Table (ERST) support is disabled.\n");
goto err;
}
......@@ -1144,14 +1141,14 @@ static int __init erst_init(void)
goto err;
else if (ACPI_FAILURE(status)) {
const char *msg = acpi_format_exception(status);
pr_err(ERST_PFX "Failed to get table, %s\n", msg);
pr_err("Failed to get table, %s\n", msg);
rc = -EINVAL;
goto err;
}
rc = erst_check_table(erst_tab);
if (rc) {
pr_err(FW_BUG ERST_PFX "ERST table is invalid\n");
pr_err(FW_BUG "ERST table is invalid.\n");
goto err;
}
......@@ -1169,21 +1166,19 @@ static int __init erst_init(void)
rc = erst_get_erange(&erst_erange);
if (rc) {
if (rc == -ENODEV)
pr_info(ERST_PFX
pr_info(
"The corresponding hardware device or firmware implementation "
"is not available.\n");
else
pr_err(ERST_PFX
"Failed to get Error Log Address Range.\n");
pr_err("Failed to get Error Log Address Range.\n");
goto err_unmap_reg;
}
r = request_mem_region(erst_erange.base, erst_erange.size, "APEI ERST");
if (!r) {
pr_err(ERST_PFX
"Can not request iomem region <0x%16llx-0x%16llx> for ERST.\n",
(unsigned long long)erst_erange.base,
(unsigned long long)erst_erange.base + erst_erange.size);
pr_err("Can not request [mem %#010llx-%#010llx] for ERST.\n",
(unsigned long long)erst_erange.base,
(unsigned long long)erst_erange.base + erst_erange.size - 1);
rc = -EIO;
goto err_unmap_reg;
}
......@@ -1193,7 +1188,7 @@ static int __init erst_init(void)
if (!erst_erange.vaddr)
goto err_release_erange;
pr_info(ERST_PFX
pr_info(
"Error Record Serialization Table (ERST) support is initialized.\n");
buf = kmalloc(erst_erange.size, GFP_KERNEL);
......@@ -1205,15 +1200,15 @@ static int __init erst_init(void)
rc = pstore_register(&erst_info);
if (rc) {
if (rc != -EPERM)
pr_info(ERST_PFX
"Could not register with persistent store\n");
pr_info(
"Could not register with persistent store.\n");
erst_info.buf = NULL;
erst_info.bufsize = 0;
kfree(buf);
}
} else
pr_err(ERST_PFX
"Failed to allocate %lld bytes for persistent store error log\n",
pr_err(
"Failed to allocate %lld bytes for persistent store error log.\n",
erst_erange.size);
return 0;
......
......@@ -409,6 +409,34 @@ static void ghes_clear_estatus(struct ghes *ghes)
ghes->flags &= ~GHES_TO_CLEAR;
}
static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
{
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
unsigned long pfn;
int sec_sev = ghes_severity(gdata->error_severity);
struct cper_sec_mem_err *mem_err;
mem_err = (struct cper_sec_mem_err *)(gdata + 1);
if (sec_sev == GHES_SEV_CORRECTED &&
(gdata->flags & CPER_SEC_ERROR_THRESHOLD_EXCEEDED) &&
(mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS)) {
pfn = mem_err->physical_addr >> PAGE_SHIFT;
if (pfn_valid(pfn))
memory_failure_queue(pfn, 0, MF_SOFT_OFFLINE);
else if (printk_ratelimit())
pr_warn(FW_WARN GHES_PFX
"Invalid address in generic error data: %#llx\n",
mem_err->physical_addr);
}
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
pfn = mem_err->physical_addr >> PAGE_SHIFT;
memory_failure_queue(pfn, 0, 0);
}
#endif
}
static void ghes_do_proc(struct ghes *ghes,
const struct acpi_hest_generic_status *estatus)
{
......@@ -428,15 +456,7 @@ static void ghes_do_proc(struct ghes *ghes,
apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED,
mem_err);
#endif
#ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
if (sev == GHES_SEV_RECOVERABLE &&
sec_sev == GHES_SEV_RECOVERABLE &&
mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
unsigned long pfn;
pfn = mem_err->physical_addr >> PAGE_SHIFT;
memory_failure_queue(pfn, 0, 0);
}
#endif
ghes_handle_memory_failure(gdata, sev);
}
#ifdef CONFIG_ACPI_APEI_PCIEAER
else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
......
......@@ -36,6 +36,7 @@
#include <linux/io.h>
#include <linux/platform_device.h>
#include <acpi/apei.h>
#include <asm/mce.h>
#include "apei-internal.h"
......@@ -121,6 +122,41 @@ int apei_hest_parse(apei_hest_func_t func, void *data)
}
EXPORT_SYMBOL_GPL(apei_hest_parse);
/*
* Check if firmware advertises firmware first mode. We need FF bit to be set
* along with a set of MC banks which work in FF mode.
*/
static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data)
{
#ifdef CONFIG_X86_MCE
int i;
struct acpi_hest_ia_corrected *cmc;
struct acpi_hest_ia_error_bank *mc_bank;
if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK)
return 0;
cmc = (struct acpi_hest_ia_corrected *)hest_hdr;
if (!cmc->enabled)
return 0;
/*
* We expect HEST to provide a list of MC banks that report errors
* in firmware first mode. Otherwise, return non-zero value to
* indicate that we are done parsing HEST.
*/
if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks)
return 1;
pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n");
mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1);
for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++)
mce_disable_bank(mc_bank->bank_number);
#endif
return 1;
}
struct ghes_arr {
struct platform_device **ghes_devs;
unsigned int count;
......@@ -227,6 +263,9 @@ void __init acpi_hest_init(void)
goto err;
}
if (!acpi_disable_cmcff)
apei_hest_parse(hest_parse_cmc, NULL);
if (!ghes_disable) {
rc = apei_hest_parse(hest_parse_ghes_count, &ghes_count);
if (rc)
......
This diff is collapsed.
......@@ -170,6 +170,8 @@
/*
* PCI-defined configuration space registers
*/
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c
#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
#define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531
......@@ -181,13 +183,22 @@
#define DRAM_BASE_LO 0x40
#define DRAM_LIMIT_LO 0x44
#define dram_intlv_en(pvt, i) ((u8)((pvt->ranges[i].base.lo >> 8) & 0x7))
/*
* F15 M30h D18F1x2[1C:00]
*/
#define DRAM_CONT_BASE 0x200
#define DRAM_CONT_LIMIT 0x204
/*
* F15 M30h D18F1x2[4C:40]
*/
#define DRAM_CONT_HIGH_OFF 0x240
#define dram_rw(pvt, i) ((u8)(pvt->ranges[i].base.lo & 0x3))
#define dram_intlv_sel(pvt, i) ((u8)((pvt->ranges[i].lim.lo >> 8) & 0x7))
#define dram_dst_node(pvt, i) ((u8)(pvt->ranges[i].lim.lo & 0x7))
#define DHAR 0xf0
#define dhar_valid(pvt) ((pvt)->dhar & BIT(0))
#define dhar_mem_hoist_valid(pvt) ((pvt)->dhar & BIT(1))
#define dhar_base(pvt) ((pvt)->dhar & 0xff000000)
#define k8_dhar_offset(pvt) (((pvt)->dhar & 0x0000ff00) << 16)
......@@ -234,8 +245,6 @@
#define DDR3_MODE BIT(8)
#define DCT_SEL_LO 0x110
#define dct_sel_baseaddr(pvt) ((pvt)->dct_sel_lo & 0xFFFFF800)
#define dct_sel_interleave_addr(pvt) (((pvt)->dct_sel_lo >> 6) & 0x3)
#define dct_high_range_enabled(pvt) ((pvt)->dct_sel_lo & BIT(0))
#define dct_interleave_enabled(pvt) ((pvt)->dct_sel_lo & BIT(2))
......@@ -297,6 +306,7 @@ enum amd_families {
K8_CPUS = 0,
F10_CPUS,
F15_CPUS,
F15_M30H_CPUS,
F16_CPUS,
NUM_FAMILIES,
};
......@@ -337,6 +347,10 @@ struct amd64_pvt {
struct pci_dev *F1, *F2, *F3;
u16 mc_node_id; /* MC index of this MC node */
u8 fam; /* CPU family */
u8 model; /* ... model */
u8 stepping; /* ... stepping */
int ext_model; /* extended model value of this node */
int channel_count;
......@@ -414,6 +428,14 @@ static inline u16 extract_syndrome(u64 status)
return ((status >> 47) & 0xff) | ((status >> 16) & 0xff00);
}
static inline u8 dct_sel_interleave_addr(struct amd64_pvt *pvt)
{
if (pvt->fam == 0x15 && pvt->model >= 0x30)
return (((pvt->dct_sel_hi >> 9) & 0x1) << 2) |
((pvt->dct_sel_lo >> 6) & 0x3);
return ((pvt)->dct_sel_lo >> 6) & 0x3;
}
/*
* per-node ECC settings descriptor
*/
......@@ -504,3 +526,33 @@ static inline void enable_caches(void *dummy)
{
write_cr0(read_cr0() & ~X86_CR0_CD);
}
static inline u8 dram_intlv_en(struct amd64_pvt *pvt, unsigned int i)
{
if (pvt->fam == 0x15 && pvt->model >= 0x30) {
u32 tmp;
amd64_read_pci_cfg(pvt->F1, DRAM_CONT_LIMIT, &tmp);
return (u8) tmp & 0xF;
}
return (u8) (pvt->ranges[i].base.lo >> 8) & 0x7;
}
static inline u8 dhar_valid(struct amd64_pvt *pvt)
{
if (pvt->fam == 0x15 && pvt->model >= 0x30) {
u32 tmp;
amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp);
return (tmp >> 1) & BIT(0);
}
return (pvt)->dhar & BIT(0);
}
static inline u32 dct_sel_baseaddr(struct amd64_pvt *pvt)
{
if (pvt->fam == 0x15 && pvt->model >= 0x30) {
u32 tmp;
amd64_read_pci_cfg(pvt->F1, DRAM_CONT_BASE, &tmp);
return (tmp >> 11) & 0x1FFF;
}
return (pvt)->dct_sel_lo & 0xFFFFF800;
}
......@@ -789,7 +789,7 @@ static struct cpc925_dev_info cpc925_devs[] = {
.exit = cpc925_htlink_exit,
.check = cpc925_htlink_check,
},
{0}, /* Terminated by NULL */
{ }
};
/*
......
......@@ -58,8 +58,10 @@ static int edac_set_poll_msec(const char *val, struct kernel_param *kp)
if (!val)
return -EINVAL;
ret = strict_strtol(val, 0, &l);
if (ret == -EINVAL || ((int)l != l))
ret = kstrtol(val, 0, &l);
if (ret)
return ret;
if ((int)l != l)
return -EINVAL;
*((int *)kp->arg) = l;
......
......@@ -260,8 +260,7 @@ static void i3200_check(struct mem_ctl_info *mci)
i3200_process_error_info(mci, &info);
}
void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
static void __iomem *i3200_map_mchbar(struct pci_dev *pdev)
{
union {
u64 mchbar;
......
......@@ -248,8 +248,7 @@ static void x38_check(struct mem_ctl_info *mci)
x38_process_error_info(mci, &info);
}
void __iomem *x38_map_mchbar(struct pci_dev *pdev)
static void __iomem *x38_map_mchbar(struct pci_dev *pdev)
{
union {
u64 mchbar;
......
......@@ -1798,6 +1798,7 @@ enum mf_flags {
MF_COUNT_INCREASED = 1 << 0,
MF_ACTION_REQUIRED = 1 << 1,
MF_MUST_KILL = 1 << 2,
MF_SOFT_OFFLINE = 1 << 3,
};
extern int memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
......
......@@ -518,6 +518,8 @@
#define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303
#define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304
#define PCI_DEVICE_ID_AMD_15H_M10H_F3 0x1403
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F3 0x141d
#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F4 0x141e
#define PCI_DEVICE_ID_AMD_15H_NB_F0 0x1600
#define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601
#define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602
......
......@@ -1286,7 +1286,10 @@ static void memory_failure_work_func(struct work_struct *work)
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
memory_failure(entry.pfn, entry.trapno, entry.flags);
if (entry.flags & MF_SOFT_OFFLINE)
soft_offline_page(pfn_to_page(entry.pfn), entry.flags);
else
memory_failure(entry.pfn, entry.trapno, entry.flags);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment