Commit 1b37b8c4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_for_5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull EDAC updates from Borislav Petkov:

 - A new EDAC AST 2500 SoC driver (Stefan M Schaeckeler)

 - New i10nm EDAC driver for Intel 10nm CPUs (Qiuxu Zhuo and Tony Luck)

 - Altera SDRAM functionality carveout for separate enablement of RAS
   and SDRAM capabilities on some Altera chips. (Thor Thayer)

 - The usual round of cleanups and fixes

And last but not least: recruit James Morse as a reviewer for the ARM
side.

* tag 'edac_for_5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  EDAC/altera: Add separate SDRAM EDAC config
  EDAC, altera: Add missing of_node_put()
  EDAC, skx_common: Add code to recognise new compound error code
  EDAC, i10nm: Fix randconfig builds
  EDAC, i10nm: Add a driver for Intel 10nm server processors
  EDAC, skx_edac: Delete duplicated code
  EDAC, skx_common: Separate common code out from skx_edac
  EDAC: Do not check return value of debugfs_create() functions
  EDAC: Add James Morse as a reviewer
  dt-bindings, EDAC: Add Aspeed AST2500
  EDAC, aspeed: Add an Aspeed AST2500 EDAC driver
parents c6400e5c 580b5cf5
Aspeed AST2500 SoC EDAC node
The Aspeed AST2500 SoC supports DDR3 and DDR4 memory with and without ECC (error
correction check).
The memory controller supports SECDED (single bit error correction, double bit
error detection) and single bit error auto scrubbing by reserving 8 bits for
every 64 bit word (effectively reducing available memory to 8/9).
Note, the bootloader must configure ECC mode in the memory controller.
Required properties:
- compatible: should be "aspeed,ast2500-sdram-edac"
- reg: sdram controller register set should be <0x1e6e0000 0x174>
- interrupts: should be AVIC interrupt #0
Example:
edac: sdram@1e6e0000 {
compatible = "aspeed,ast2500-sdram-edac";
reg = <0x1e6e0000 0x174>;
interrupts = <0>;
};
...@@ -5503,6 +5503,12 @@ L: linux-edac@vger.kernel.org ...@@ -5503,6 +5503,12 @@ L: linux-edac@vger.kernel.org
S: Maintained S: Maintained
F: drivers/edac/amd64_edac* F: drivers/edac/amd64_edac*
EDAC-AST2500
M: Stefan Schaeckeler <sschaeck@cisco.com>
S: Supported
F: drivers/edac/aspeed_edac.c
F: Documentation/devicetree/bindings/edac/aspeed-sdram-edac.txt
EDAC-CALXEDA EDAC-CALXEDA
M: Robert Richter <rric@kernel.org> M: Robert Richter <rric@kernel.org>
L: linux-edac@vger.kernel.org L: linux-edac@vger.kernel.org
...@@ -5527,6 +5533,7 @@ F: drivers/edac/thunderx_edac* ...@@ -5527,6 +5533,7 @@ F: drivers/edac/thunderx_edac*
EDAC-CORE EDAC-CORE
M: Borislav Petkov <bp@alien8.de> M: Borislav Petkov <bp@alien8.de>
M: Mauro Carvalho Chehab <mchehab@kernel.org> M: Mauro Carvalho Chehab <mchehab@kernel.org>
R: James Morse <james.morse@arm.com>
L: linux-edac@vger.kernel.org L: linux-edac@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next T: git git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git for-next
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next T: git git://git.kernel.org/pub/scm/linux/kernel/git/mchehab/linux-edac.git linux_next
......
...@@ -47,6 +47,13 @@ memory@80000000 { ...@@ -47,6 +47,13 @@ memory@80000000 {
reg = <0x80000000 0>; reg = <0x80000000 0>;
}; };
edac: sdram@1e6e0000 {
compatible = "aspeed,ast2500-sdram-edac";
reg = <0x1e6e0000 0x174>;
interrupts = <0>;
status = "disabled";
};
ahb { ahb {
compatible = "simple-bus"; compatible = "simple-bus";
#address-cells = <1>; #address-cells = <1>;
......
...@@ -241,6 +241,18 @@ config EDAC_SKX ...@@ -241,6 +241,18 @@ config EDAC_SKX
system has non-volatile DIMMs you should also manually system has non-volatile DIMMs you should also manually
select CONFIG_ACPI_NFIT. select CONFIG_ACPI_NFIT.
config EDAC_I10NM
tristate "Intel 10nm server Integrated MC"
depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG && ACPI
depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_I10NM can't be y
select DMI
select ACPI_ADXL
help
Support for error detection and correction the Intel
10nm server Integrated Memory Controllers. If your
system has non-volatile DIMMs you should also manually
select CONFIG_ACPI_NFIT.
config EDAC_PND2 config EDAC_PND2
tristate "Intel Pondicherry2" tristate "Intel Pondicherry2"
depends on PCI && X86_64 && X86_MCE_INTEL depends on PCI && X86_64 && X86_MCE_INTEL
...@@ -379,9 +391,17 @@ config EDAC_ALTERA ...@@ -379,9 +391,17 @@ config EDAC_ALTERA
depends on EDAC=y && (ARCH_SOCFPGA || ARCH_STRATIX10) depends on EDAC=y && (ARCH_SOCFPGA || ARCH_STRATIX10)
help help
Support for error detection and correction on the Support for error detection and correction on the
Altera SOCs. This must be selected for SDRAM ECC. Altera SOCs. This is the global enable for the
Note that the preloader must initialize the SDRAM various Altera peripherals.
before loading the kernel.
config EDAC_ALTERA_SDRAM
bool "Altera SDRAM ECC"
depends on EDAC_ALTERA=y
help
Support for error detection and correction on the
Altera SDRAM Memory for Altera SoCs. Note that the
preloader must initialize the SDRAM before loading
the kernel.
config EDAC_ALTERA_L2C config EDAC_ALTERA_L2C
bool "Altera L2 Cache ECC" bool "Altera L2 Cache ECC"
...@@ -475,4 +495,13 @@ config EDAC_QCOM ...@@ -475,4 +495,13 @@ config EDAC_QCOM
For debugging issues having to do with stability and overall system For debugging issues having to do with stability and overall system
health, you should probably say 'Y' here. health, you should probably say 'Y' here.
config EDAC_ASPEED
tristate "Aspeed AST 2500 SoC"
depends on MACH_ASPEED_G5
help
Support for error detection and correction on the Aspeed AST 2500 SoC.
First, ECC must be configured in the bootloader. Then, this driver
will expose error counters via the EDAC kernel framework.
endif # EDAC endif # EDAC
...@@ -30,7 +30,6 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o ...@@ -30,7 +30,6 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o
obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7300) += i7300_edac.o
obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o
obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o
obj-$(CONFIG_EDAC_SKX) += skx_edac.o
obj-$(CONFIG_EDAC_PND2) += pnd2_edac.o obj-$(CONFIG_EDAC_PND2) += pnd2_edac.o
obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o
obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o
...@@ -58,6 +57,12 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o ...@@ -58,6 +57,12 @@ obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o
layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o
obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o
skx_edac-y := skx_common.o skx_base.o
obj-$(CONFIG_EDAC_SKX) += skx_edac.o
i10nm_edac-y := skx_common.o i10nm_base.o
obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o
obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o
obj-$(CONFIG_EDAC_CELL) += cell_edac.o obj-$(CONFIG_EDAC_CELL) += cell_edac.o
obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
...@@ -78,3 +83,4 @@ obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o ...@@ -78,3 +83,4 @@ obj-$(CONFIG_EDAC_SYNOPSYS) += synopsys_edac.o
obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o obj-$(CONFIG_EDAC_XGENE) += xgene_edac.o
obj-$(CONFIG_EDAC_TI) += ti_edac.o obj-$(CONFIG_EDAC_TI) += ti_edac.o
obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o obj-$(CONFIG_EDAC_QCOM) += qcom_edac.o
obj-$(CONFIG_EDAC_ASPEED) += aspeed_edac.o
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define EDAC_MOD_STR "altera_edac" #define EDAC_MOD_STR "altera_edac"
#define EDAC_DEVICE "Altera" #define EDAC_DEVICE "Altera"
#ifdef CONFIG_EDAC_ALTERA_SDRAM
static const struct altr_sdram_prv_data c5_data = { static const struct altr_sdram_prv_data c5_data = {
.ecc_ctrl_offset = CV_CTLCFG_OFST, .ecc_ctrl_offset = CV_CTLCFG_OFST,
.ecc_ctl_en_mask = CV_CTLCFG_ECC_AUTO_EN, .ecc_ctl_en_mask = CV_CTLCFG_ECC_AUTO_EN,
...@@ -468,6 +469,39 @@ static int altr_sdram_remove(struct platform_device *pdev) ...@@ -468,6 +469,39 @@ static int altr_sdram_remove(struct platform_device *pdev)
return 0; return 0;
} }
/*
* If you want to suspend, need to disable EDAC by removing it
* from the device tree or defconfig.
*/
#ifdef CONFIG_PM
static int altr_sdram_prepare(struct device *dev)
{
pr_err("Suspend not allowed when EDAC is enabled.\n");
return -EPERM;
}
static const struct dev_pm_ops altr_sdram_pm_ops = {
.prepare = altr_sdram_prepare,
};
#endif
static struct platform_driver altr_sdram_edac_driver = {
.probe = altr_sdram_probe,
.remove = altr_sdram_remove,
.driver = {
.name = "altr_sdram_edac",
#ifdef CONFIG_PM
.pm = &altr_sdram_pm_ops,
#endif
.of_match_table = altr_sdram_ctrl_of_match,
},
};
module_platform_driver(altr_sdram_edac_driver);
#endif /* CONFIG_EDAC_ALTERA_SDRAM */
/**************** Stratix 10 EDAC Memory Controller Functions ************/ /**************** Stratix 10 EDAC Memory Controller Functions ************/
/** /**
...@@ -530,37 +564,6 @@ static const struct regmap_config s10_sdram_regmap_cfg = { ...@@ -530,37 +564,6 @@ static const struct regmap_config s10_sdram_regmap_cfg = {
/************** </Stratix10 EDAC Memory Controller Functions> ***********/ /************** </Stratix10 EDAC Memory Controller Functions> ***********/
/*
* If you want to suspend, need to disable EDAC by removing it
* from the device tree or defconfig.
*/
#ifdef CONFIG_PM
static int altr_sdram_prepare(struct device *dev)
{
pr_err("Suspend not allowed when EDAC is enabled.\n");
return -EPERM;
}
static const struct dev_pm_ops altr_sdram_pm_ops = {
.prepare = altr_sdram_prepare,
};
#endif
static struct platform_driver altr_sdram_edac_driver = {
.probe = altr_sdram_probe,
.remove = altr_sdram_remove,
.driver = {
.name = "altr_sdram_edac",
#ifdef CONFIG_PM
.pm = &altr_sdram_pm_ops,
#endif
.of_match_table = altr_sdram_ctrl_of_match,
},
};
module_platform_driver(altr_sdram_edac_driver);
/************************* EDAC Parent Probe *************************/ /************************* EDAC Parent Probe *************************/
static const struct of_device_id altr_edac_device_of_match[]; static const struct of_device_id altr_edac_device_of_match[];
...@@ -1046,14 +1049,17 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask, ...@@ -1046,14 +1049,17 @@ altr_init_a10_ecc_block(struct device_node *np, u32 irq_mask,
return -ENODEV; return -ENODEV;
} }
if (of_address_to_resource(sysmgr_np, 0, &res)) if (of_address_to_resource(sysmgr_np, 0, &res)) {
of_node_put(sysmgr_np);
return -ENOMEM; return -ENOMEM;
}
/* Need physical address for SMCC call */ /* Need physical address for SMCC call */
base = res.start; base = res.start;
ecc_mgr_map = regmap_init(NULL, NULL, (void *)base, ecc_mgr_map = regmap_init(NULL, NULL, (void *)base,
&s10_sdram_regmap_cfg); &s10_sdram_regmap_cfg);
of_node_put(sysmgr_np);
} }
of_node_put(np_eccmgr); of_node_put(np_eccmgr);
if (IS_ERR(ecc_mgr_map)) { if (IS_ERR(ecc_mgr_map)) {
...@@ -2140,11 +2146,13 @@ static int altr_edac_a10_probe(struct platform_device *pdev) ...@@ -2140,11 +2146,13 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
altr_edac_a10_device_add(edac, child); altr_edac_a10_device_add(edac, child);
#ifdef CONFIG_EDAC_ALTERA_SDRAM
else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) || else if ((of_device_is_compatible(child, "altr,sdram-edac-a10")) ||
(of_device_is_compatible(child, "altr,sdram-edac-s10"))) (of_device_is_compatible(child, "altr,sdram-edac-s10")))
of_platform_populate(pdev->dev.of_node, of_platform_populate(pdev->dev.of_node,
altr_sdram_ctrl_of_match, altr_sdram_ctrl_of_match,
NULL, &pdev->dev); NULL, &pdev->dev);
#endif
} }
return 0; return 0;
......
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright 2018, 2019 Cisco Systems
*/
#include <linux/edac.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/stop_machine.h>
#include <linux/io.h>
#include <linux/of_address.h>
#include <linux/regmap.h>
#include "edac_module.h"
#define DRV_NAME "aspeed-edac"
#define ASPEED_MCR_PROT 0x00 /* protection key register */
#define ASPEED_MCR_CONF 0x04 /* configuration register */
#define ASPEED_MCR_INTR_CTRL 0x50 /* interrupt control/status register */
#define ASPEED_MCR_ADDR_UNREC 0x58 /* address of first un-recoverable error */
#define ASPEED_MCR_ADDR_REC 0x5c /* address of last recoverable error */
#define ASPEED_MCR_LAST ASPEED_MCR_ADDR_REC
#define ASPEED_MCR_PROT_PASSWD 0xfc600309
#define ASPEED_MCR_CONF_DRAM_TYPE BIT(4)
#define ASPEED_MCR_CONF_ECC BIT(7)
#define ASPEED_MCR_INTR_CTRL_CLEAR BIT(31)
#define ASPEED_MCR_INTR_CTRL_CNT_REC GENMASK(23, 16)
#define ASPEED_MCR_INTR_CTRL_CNT_UNREC GENMASK(15, 12)
#define ASPEED_MCR_INTR_CTRL_ENABLE (BIT(0) | BIT(1))
static struct regmap *aspeed_regmap;
static int regmap_reg_write(void *context, unsigned int reg, unsigned int val)
{
void __iomem *regs = (void __iomem *)context;
/* enable write to MCR register set */
writel(ASPEED_MCR_PROT_PASSWD, regs + ASPEED_MCR_PROT);
writel(val, regs + reg);
/* disable write to MCR register set */
writel(~ASPEED_MCR_PROT_PASSWD, regs + ASPEED_MCR_PROT);
return 0;
}
static int regmap_reg_read(void *context, unsigned int reg, unsigned int *val)
{
void __iomem *regs = (void __iomem *)context;
*val = readl(regs + reg);
return 0;
}
static bool regmap_is_volatile(struct device *dev, unsigned int reg)
{
switch (reg) {
case ASPEED_MCR_PROT:
case ASPEED_MCR_INTR_CTRL:
case ASPEED_MCR_ADDR_UNREC:
case ASPEED_MCR_ADDR_REC:
return true;
default:
return false;
}
}
static const struct regmap_config aspeed_regmap_config = {
.reg_bits = 32,
.val_bits = 32,
.reg_stride = 4,
.max_register = ASPEED_MCR_LAST,
.reg_write = regmap_reg_write,
.reg_read = regmap_reg_read,
.volatile_reg = regmap_is_volatile,
.fast_io = true,
};
static void count_rec(struct mem_ctl_info *mci, u8 rec_cnt, u32 rec_addr)
{
struct csrow_info *csrow = mci->csrows[0];
u32 page, offset, syndrome;
if (!rec_cnt)
return;
/* report first few errors (if there are) */
/* note: no addresses are recorded */
if (rec_cnt > 1) {
/* page, offset and syndrome are not available */
page = 0;
offset = 0;
syndrome = 0;
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, rec_cnt-1,
page, offset, syndrome, 0, 0, -1,
"address(es) not available", "");
}
/* report last error */
/* note: rec_addr is the last recoverable error addr */
page = rec_addr >> PAGE_SHIFT;
offset = rec_addr & ~PAGE_MASK;
/* syndrome is not available */
syndrome = 0;
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
csrow->first_page + page, offset, syndrome,
0, 0, -1, "", "");
}
static void count_un_rec(struct mem_ctl_info *mci, u8 un_rec_cnt,
u32 un_rec_addr)
{
struct csrow_info *csrow = mci->csrows[0];
u32 page, offset, syndrome;
if (!un_rec_cnt)
return;
/* report 1. error */
/* note: un_rec_addr is the first unrecoverable error addr */
page = un_rec_addr >> PAGE_SHIFT;
offset = un_rec_addr & ~PAGE_MASK;
/* syndrome is not available */
syndrome = 0;
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
csrow->first_page + page, offset, syndrome,
0, 0, -1, "", "");
/* report further errors (if there are) */
/* note: no addresses are recorded */
if (un_rec_cnt > 1) {
/* page, offset and syndrome are not available */
page = 0;
offset = 0;
syndrome = 0;
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, un_rec_cnt-1,
page, offset, syndrome, 0, 0, -1,
"address(es) not available", "");
}
}
static irqreturn_t mcr_isr(int irq, void *arg)
{
struct mem_ctl_info *mci = arg;
u32 rec_addr, un_rec_addr;
u32 reg50, reg5c, reg58;
u8 rec_cnt, un_rec_cnt;
regmap_read(aspeed_regmap, ASPEED_MCR_INTR_CTRL, &reg50);
dev_dbg(mci->pdev, "received edac interrupt w/ mcr register 50: 0x%x\n",
reg50);
/* collect data about recoverable and unrecoverable errors */
rec_cnt = (reg50 & ASPEED_MCR_INTR_CTRL_CNT_REC) >> 16;
un_rec_cnt = (reg50 & ASPEED_MCR_INTR_CTRL_CNT_UNREC) >> 12;
dev_dbg(mci->pdev, "%d recoverable interrupts and %d unrecoverable interrupts\n",
rec_cnt, un_rec_cnt);
regmap_read(aspeed_regmap, ASPEED_MCR_ADDR_UNREC, &reg58);
un_rec_addr = reg58;
regmap_read(aspeed_regmap, ASPEED_MCR_ADDR_REC, &reg5c);
rec_addr = reg5c;
/* clear interrupt flags and error counters: */
regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL,
ASPEED_MCR_INTR_CTRL_CLEAR,
ASPEED_MCR_INTR_CTRL_CLEAR);
regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL,
ASPEED_MCR_INTR_CTRL_CLEAR, 0);
/* process recoverable and unrecoverable errors */
count_rec(mci, rec_cnt, rec_addr);
count_un_rec(mci, un_rec_cnt, un_rec_addr);
if (!rec_cnt && !un_rec_cnt)
dev_dbg(mci->pdev, "received edac interrupt, but did not find any ECC counters\n");
regmap_read(aspeed_regmap, ASPEED_MCR_INTR_CTRL, &reg50);
dev_dbg(mci->pdev, "edac interrupt handled. mcr reg 50 is now: 0x%x\n",
reg50);
return IRQ_HANDLED;
}
static int config_irq(void *ctx, struct platform_device *pdev)
{
int irq;
int rc;
/* register interrupt handler */
irq = platform_get_irq(pdev, 0);
dev_dbg(&pdev->dev, "got irq %d\n", irq);
if (!irq)
return -ENODEV;
rc = devm_request_irq(&pdev->dev, irq, mcr_isr, IRQF_TRIGGER_HIGH,
DRV_NAME, ctx);
if (rc) {
dev_err(&pdev->dev, "unable to request irq %d\n", irq);
return rc;
}
/* enable interrupts */
regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL,
ASPEED_MCR_INTR_CTRL_ENABLE,
ASPEED_MCR_INTR_CTRL_ENABLE);
return 0;
}
static int init_csrows(struct mem_ctl_info *mci)
{
struct csrow_info *csrow = mci->csrows[0];
u32 nr_pages, dram_type;
struct dimm_info *dimm;
struct device_node *np;
struct resource r;
u32 reg04;
int rc;
/* retrieve info about physical memory from device tree */
np = of_find_node_by_path("/memory");
if (!np) {
dev_err(mci->pdev, "dt: missing /memory node\n");
return -ENODEV;
};
rc = of_address_to_resource(np, 0, &r);
of_node_put(np);
if (rc) {
dev_err(mci->pdev, "dt: failed requesting resource for /memory node\n");
return rc;
};
dev_dbg(mci->pdev, "dt: /memory node resources: first page r.start=0x%x, resource_size=0x%x, PAGE_SHIFT macro=0x%x\n",
r.start, resource_size(&r), PAGE_SHIFT);
csrow->first_page = r.start >> PAGE_SHIFT;
nr_pages = resource_size(&r) >> PAGE_SHIFT;
csrow->last_page = csrow->first_page + nr_pages - 1;
regmap_read(aspeed_regmap, ASPEED_MCR_CONF, &reg04);
dram_type = (reg04 & ASPEED_MCR_CONF_DRAM_TYPE) ? MEM_DDR4 : MEM_DDR3;
dimm = csrow->channels[0]->dimm;
dimm->mtype = dram_type;
dimm->edac_mode = EDAC_SECDED;
dimm->nr_pages = nr_pages / csrow->nr_channels;
dev_dbg(mci->pdev, "initialized dimm with first_page=0x%lx and nr_pages=0x%x\n",
csrow->first_page, nr_pages);
return 0;
}
static int aspeed_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct edac_mc_layer layers[2];
struct mem_ctl_info *mci;
struct device_node *np;
struct resource *res;
void __iomem *regs;
u32 reg04;
int rc;
/* setup regmap */
np = dev->of_node;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -ENOENT;
regs = devm_ioremap_resource(dev, res);
if (IS_ERR(regs))
return PTR_ERR(regs);
aspeed_regmap = devm_regmap_init(dev, NULL, (__force void *)regs,
&aspeed_regmap_config);
if (IS_ERR(aspeed_regmap))
return PTR_ERR(aspeed_regmap);
/* bail out if ECC mode is not configured */
regmap_read(aspeed_regmap, ASPEED_MCR_CONF, &reg04);
if (!(reg04 & ASPEED_MCR_CONF_ECC)) {
dev_err(&pdev->dev, "ECC mode is not configured in u-boot\n");
return -EPERM;
}
edac_op_state = EDAC_OPSTATE_INT;
/* allocate & init EDAC MC data structure */
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
layers[0].size = 1;
layers[0].is_virt_csrow = true;
layers[1].type = EDAC_MC_LAYER_CHANNEL;
layers[1].size = 1;
layers[1].is_virt_csrow = false;
mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 0);
if (!mci)
return -ENOMEM;
mci->pdev = &pdev->dev;
mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->scrub_cap = SCRUB_FLAG_HW_SRC;
mci->scrub_mode = SCRUB_HW_SRC;
mci->mod_name = DRV_NAME;
mci->ctl_name = "MIC";
mci->dev_name = dev_name(&pdev->dev);
rc = init_csrows(mci);
if (rc) {
dev_err(&pdev->dev, "failed to init csrows\n");
goto probe_exit02;
}
platform_set_drvdata(pdev, mci);
/* register with edac core */
rc = edac_mc_add_mc(mci);
if (rc) {
dev_err(&pdev->dev, "failed to register with EDAC core\n");
goto probe_exit02;
}
/* register interrupt handler and enable interrupts */
rc = config_irq(mci, pdev);
if (rc) {
dev_err(&pdev->dev, "failed setting up irq\n");
goto probe_exit01;
}
return 0;
probe_exit01:
edac_mc_del_mc(&pdev->dev);
probe_exit02:
edac_mc_free(mci);
return rc;
}
static int aspeed_remove(struct platform_device *pdev)
{
struct mem_ctl_info *mci;
/* disable interrupts */
regmap_update_bits(aspeed_regmap, ASPEED_MCR_INTR_CTRL,
ASPEED_MCR_INTR_CTRL_ENABLE, 0);
/* free resources */
mci = edac_mc_del_mc(&pdev->dev);
if (mci)
edac_mc_free(mci);
return 0;
}
static const struct of_device_id aspeed_of_match[] = {
{ .compatible = "aspeed,ast2500-sdram-edac" },
{},
};
static struct platform_driver aspeed_driver = {
.driver = {
.name = DRV_NAME,
.of_match_table = aspeed_of_match
},
.probe = aspeed_probe,
.remove = aspeed_remove
};
static int __init aspeed_init(void)
{
return platform_driver_register(&aspeed_driver);
}
static void __exit aspeed_exit(void)
{
platform_driver_unregister(&aspeed_driver);
}
module_init(aspeed_init);
module_exit(aspeed_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Stefan Schaeckeler <sschaeck@cisco.com>");
MODULE_DESCRIPTION("Aspeed AST2500 EDAC driver");
MODULE_VERSION("1.0");
...@@ -41,14 +41,9 @@ static const struct file_operations debug_fake_inject_fops = { ...@@ -41,14 +41,9 @@ static const struct file_operations debug_fake_inject_fops = {
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
}; };
int __init edac_debugfs_init(void) void __init edac_debugfs_init(void)
{ {
edac_debugfs = debugfs_create_dir("edac", NULL); edac_debugfs = debugfs_create_dir("edac", NULL);
if (IS_ERR(edac_debugfs)) {
edac_debugfs = NULL;
return -ENOMEM;
}
return 0;
} }
void edac_debugfs_exit(void) void edac_debugfs_exit(void)
...@@ -56,50 +51,31 @@ void edac_debugfs_exit(void) ...@@ -56,50 +51,31 @@ void edac_debugfs_exit(void)
debugfs_remove_recursive(edac_debugfs); debugfs_remove_recursive(edac_debugfs);
} }
int edac_create_debugfs_nodes(struct mem_ctl_info *mci) void edac_create_debugfs_nodes(struct mem_ctl_info *mci)
{ {
struct dentry *d, *parent; struct dentry *parent;
char name[80]; char name[80];
int i; int i;
if (!edac_debugfs) parent = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs);
return -ENODEV;
d = debugfs_create_dir(mci->dev.kobj.name, edac_debugfs);
if (!d)
return -ENOMEM;
parent = d;
for (i = 0; i < mci->n_layers; i++) { for (i = 0; i < mci->n_layers; i++) {
sprintf(name, "fake_inject_%s", sprintf(name, "fake_inject_%s",
edac_layer_name[mci->layers[i].type]); edac_layer_name[mci->layers[i].type]);
d = debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent, debugfs_create_u8(name, S_IRUGO | S_IWUSR, parent,
&mci->fake_inject_layer[i]); &mci->fake_inject_layer[i]);
if (!d)
goto nomem;
} }
d = debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent, debugfs_create_bool("fake_inject_ue", S_IRUGO | S_IWUSR, parent,
&mci->fake_inject_ue); &mci->fake_inject_ue);
if (!d)
goto nomem;
d = debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent, debugfs_create_u16("fake_inject_count", S_IRUGO | S_IWUSR, parent,
&mci->fake_inject_count); &mci->fake_inject_count);
if (!d)
goto nomem;
d = debugfs_create_file("fake_inject", S_IWUSR, parent, debugfs_create_file("fake_inject", S_IWUSR, parent, &mci->dev,
&mci->dev,
&debug_fake_inject_fops); &debug_fake_inject_fops);
if (!d)
goto nomem;
mci->debugfs = parent; mci->debugfs = parent;
return 0;
nomem:
edac_debugfs_remove_recursive(mci->debugfs);
return -ENOMEM;
} }
/* Create a toplevel dir under EDAC's debugfs hierarchy */ /* Create a toplevel dir under EDAC's debugfs hierarchy */
......
...@@ -69,9 +69,9 @@ extern void *edac_align_ptr(void **p, unsigned size, int n_elems); ...@@ -69,9 +69,9 @@ extern void *edac_align_ptr(void **p, unsigned size, int n_elems);
#define edac_debugfs_remove_recursive debugfs_remove_recursive #define edac_debugfs_remove_recursive debugfs_remove_recursive
#define edac_debugfs_remove debugfs_remove #define edac_debugfs_remove debugfs_remove
#ifdef CONFIG_EDAC_DEBUG #ifdef CONFIG_EDAC_DEBUG
int edac_debugfs_init(void); void edac_debugfs_init(void);
void edac_debugfs_exit(void); void edac_debugfs_exit(void);
int edac_create_debugfs_nodes(struct mem_ctl_info *mci); void edac_create_debugfs_nodes(struct mem_ctl_info *mci);
struct dentry *edac_debugfs_create_dir(const char *dirname); struct dentry *edac_debugfs_create_dir(const char *dirname);
struct dentry * struct dentry *
edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent); edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent);
...@@ -83,9 +83,9 @@ edac_debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 ...@@ -83,9 +83,9 @@ edac_debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8
struct dentry * struct dentry *
edac_debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value); edac_debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value);
#else #else
static inline int edac_debugfs_init(void) { return -ENODEV; } static inline void edac_debugfs_init(void) { }
static inline void edac_debugfs_exit(void) { } static inline void edac_debugfs_exit(void) { }
static inline int edac_create_debugfs_nodes(struct mem_ctl_info *mci) { return 0; } static inline void edac_create_debugfs_nodes(struct mem_ctl_info *mci) { }
static inline struct dentry *edac_debugfs_create_dir(const char *dirname) { return NULL; } static inline struct dentry *edac_debugfs_create_dir(const char *dirname) { return NULL; }
static inline struct dentry * static inline struct dentry *
edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent) { return NULL; } edac_debugfs_create_dir_at(const char *dirname, struct dentry *parent) { return NULL; }
......
// SPDX-License-Identifier: GPL-2.0
/*
* Driver for Intel(R) 10nm server memory controller.
* Copyright (c) 2019, Intel Corporation.
*
*/
#include <linux/kernel.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
#include <asm/mce.h>
#include "edac_module.h"
#include "skx_common.h"
#define I10NM_REVISION "v0.0.3"
#define EDAC_MOD_STR "i10nm_edac"
/* Debug macros */
#define i10nm_printk(level, fmt, arg...) \
edac_printk(level, "i10nm", fmt, ##arg)
#define I10NM_GET_SCK_BAR(d, reg) \
pci_read_config_dword((d)->uracu, 0xd0, &(reg))
#define I10NM_GET_IMC_BAR(d, i, reg) \
pci_read_config_dword((d)->uracu, 0xd8 + (i) * 4, &(reg))
#define I10NM_GET_DIMMMTR(m, i, j) \
(*(u32 *)((m)->mbase + 0x2080c + (i) * 0x4000 + (j) * 4))
#define I10NM_GET_MCDDRTCFG(m, i, j) \
(*(u32 *)((m)->mbase + 0x20970 + (i) * 0x4000 + (j) * 4))
#define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
#define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
#define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
GET_BITFIELD(reg, 0, 10) + 1) << 12)
static struct list_head *i10nm_edac_list;
static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
unsigned int dev, unsigned int fun)
{
struct pci_dev *pdev;
pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun));
if (!pdev) {
edac_dbg(2, "No device %02x:%02x.%x\n",
bus, dev, fun);
return NULL;
}
if (unlikely(pci_enable_device(pdev) < 0)) {
edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
bus, dev, fun);
return NULL;
}
pci_dev_get(pdev);
return pdev;
}
static int i10nm_get_all_munits(void)
{
struct pci_dev *mdev;
void __iomem *mbase;
unsigned long size;
struct skx_dev *d;
int i, j = 0;
u32 reg, off;
u64 base;
list_for_each_entry(d, i10nm_edac_list, list) {
d->util_all = pci_get_dev_wrapper(d->seg, d->bus[1], 29, 1);
if (!d->util_all)
return -ENODEV;
d->uracu = pci_get_dev_wrapper(d->seg, d->bus[0], 0, 1);
if (!d->uracu)
return -ENODEV;
if (I10NM_GET_SCK_BAR(d, reg)) {
i10nm_printk(KERN_ERR, "Failed to socket bar\n");
return -ENODEV;
}
base = I10NM_GET_SCK_MMIO_BASE(reg);
edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
j++, base, reg);
for (i = 0; i < I10NM_NUM_IMC; i++) {
mdev = pci_get_dev_wrapper(d->seg, d->bus[0],
12 + i, 0);
if (i == 0 && !mdev) {
i10nm_printk(KERN_ERR, "No IMC found\n");
return -ENODEV;
}
if (!mdev)
continue;
d->imc[i].mdev = mdev;
if (I10NM_GET_IMC_BAR(d, i, reg)) {
i10nm_printk(KERN_ERR, "Failed to get mc bar\n");
return -ENODEV;
}
off = I10NM_GET_IMC_MMIO_OFFSET(reg);
size = I10NM_GET_IMC_MMIO_SIZE(reg);
edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
i, base + off, size, reg);
mbase = ioremap(base + off, size);
if (!mbase) {
i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n",
base + off);
return -ENODEV;
}
d->imc[i].mbase = mbase;
}
}
return 0;
}
static const struct x86_cpu_id i10nm_cpuids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_TREMONT_X, 0, 0 },
{ }
};
MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
{
u32 mcmtr;
mcmtr = *(u32 *)(imc->mbase + 0x20ef8 + chan * 0x4000);
edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr);
return !!GET_BITFIELD(mcmtr, 2, 2);
}
static int i10nm_get_dimm_config(struct mem_ctl_info *mci)
{
struct skx_pvt *pvt = mci->pvt_info;
struct skx_imc *imc = pvt->imc;
struct dimm_info *dimm;
u32 mtr, mcddrtcfg;
int i, j, ndimms;
for (i = 0; i < I10NM_NUM_CHANNELS; i++) {
if (!imc->mbase)
continue;
ndimms = 0;
for (j = 0; j < I10NM_NUM_DIMMS; j++) {
dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers, i, j, 0);
mtr = I10NM_GET_DIMMMTR(imc, i, j);
mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i, j);
edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
mtr, mcddrtcfg, imc->mc, i, j);
if (IS_DIMM_PRESENT(mtr))
ndimms += skx_get_dimm_info(mtr, 0, dimm,
imc, i, j);
else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
EDAC_MOD_STR);
}
if (ndimms && !i10nm_check_ecc(imc, 0)) {
i10nm_printk(KERN_ERR, "ECC is disabled on imc %d\n",
imc->mc);
return -ENODEV;
}
}
return 0;
}
static struct notifier_block i10nm_mce_dec = {
.notifier_call = skx_mce_check_error,
.priority = MCE_PRIO_EDAC,
};
static int __init i10nm_init(void)
{
u8 mc = 0, src_id = 0, node_id = 0;
const struct x86_cpu_id *id;
const char *owner;
struct skx_dev *d;
int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
u64 tolm, tohm;
edac_dbg(2, "\n");
owner = edac_get_owner();
if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
return -EBUSY;
id = x86_match_cpu(i10nm_cpuids);
if (!id)
return -ENODEV;
rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
if (rc)
return rc;
rc = skx_get_all_bus_mappings(0x3452, 0xcc, I10NM, &i10nm_edac_list);
if (rc < 0)
goto fail;
if (rc == 0) {
i10nm_printk(KERN_ERR, "No memory controllers found\n");
return -ENODEV;
}
rc = i10nm_get_all_munits();
if (rc < 0)
goto fail;
list_for_each_entry(d, i10nm_edac_list, list) {
rc = skx_get_src_id(d, &src_id);
if (rc < 0)
goto fail;
rc = skx_get_node_id(d, &node_id);
if (rc < 0)
goto fail;
edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
for (i = 0; i < I10NM_NUM_IMC; i++) {
if (!d->imc[i].mdev)
continue;
d->imc[i].mc = mc++;
d->imc[i].lmc = i;
d->imc[i].src_id = src_id;
d->imc[i].node_id = node_id;
rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
"Intel_10nm Socket", EDAC_MOD_STR,
i10nm_get_dimm_config);
if (rc < 0)
goto fail;
}
}
rc = skx_adxl_get();
if (rc)
goto fail;
opstate_init();
mce_register_decode_chain(&i10nm_mce_dec);
setup_skx_debug("i10nm_test");
i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
return 0;
fail:
skx_remove();
return rc;
}
static void __exit i10nm_exit(void)
{
edac_dbg(2, "\n");
teardown_skx_debug();
mce_unregister_decode_chain(&i10nm_mce_dec);
skx_adxl_put();
skx_remove();
}
module_init(i10nm_init);
module_exit(i10nm_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
// SPDX-License-Identifier: GPL-2.0
/* /*
* EDAC driver for Intel(R) Xeon(R) Skylake processors * EDAC driver for Intel(R) Xeon(R) Skylake processors
* Copyright (c) 2016, Intel Corporation. * Copyright (c) 2016, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/ */
#include <linux/module.h> #include <linux/kernel.h>
#include <linux/init.h> #include <linux/processor.h>
#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/edac.h>
#include <linux/mmzone.h>
#include <linux/smp.h>
#include <linux/bitmap.h>
#include <linux/math64.h>
#include <linux/mod_devicetable.h>
#include <linux/adxl.h>
#include <acpi/nfit.h>
#include <asm/cpu_device_id.h> #include <asm/cpu_device_id.h>
#include <asm/intel-family.h> #include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/mce.h> #include <asm/mce.h>
#include "edac_module.h" #include "edac_module.h"
#include "skx_common.h"
#define EDAC_MOD_STR "skx_edac" #define EDAC_MOD_STR "skx_edac"
#define MSG_SIZE 1024
/* /*
* Debug macros * Debug macros
...@@ -47,105 +24,20 @@ ...@@ -47,105 +24,20 @@
#define skx_mc_printk(mci, level, fmt, arg...) \ #define skx_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
/* static struct list_head *skx_edac_list;
* Get a bit field at register value <v>, from bit <lo> to bit <hi>
*/
#define GET_BITFIELD(v, lo, hi) \
(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
static LIST_HEAD(skx_edac_list);
static u64 skx_tolm, skx_tohm; static u64 skx_tolm, skx_tohm;
static char *skx_msg; static int skx_num_sockets;
static unsigned int nvdimm_count; static unsigned int nvdimm_count;
enum {
INDEX_SOCKET,
INDEX_MEMCTRL,
INDEX_CHANNEL,
INDEX_DIMM,
INDEX_MAX
};
static const char * const component_names[] = {
[INDEX_SOCKET] = "ProcessorSocketId",
[INDEX_MEMCTRL] = "MemoryControllerId",
[INDEX_CHANNEL] = "ChannelId",
[INDEX_DIMM] = "DimmSlotId",
};
static int component_indices[ARRAY_SIZE(component_names)];
static int adxl_component_count;
static const char * const *adxl_component_names;
static u64 *adxl_values;
static char *adxl_msg;
#define NUM_IMC 2 /* memory controllers per socket */
#define NUM_CHANNELS 3 /* channels per memory controller */
#define NUM_DIMMS 2 /* Max DIMMS per channel */
#define MASK26 0x3FFFFFF /* Mask for 2^26 */ #define MASK26 0x3FFFFFF /* Mask for 2^26 */
#define MASK29 0x1FFFFFFF /* Mask for 2^29 */ #define MASK29 0x1FFFFFFF /* Mask for 2^29 */
/*
* Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two
* memory controllers on the die.
*/
struct skx_dev {
struct list_head list;
u8 bus[4];
int seg;
struct pci_dev *sad_all;
struct pci_dev *util_all;
u32 mcroute;
struct skx_imc {
struct mem_ctl_info *mci;
u8 mc; /* system wide mc# */
u8 lmc; /* socket relative mc# */
u8 src_id, node_id;
struct skx_channel {
struct pci_dev *cdev;
struct skx_dimm {
u8 close_pg;
u8 bank_xor_enable;
u8 fine_grain_bank;
u8 rowbits;
u8 colbits;
} dimms[NUM_DIMMS];
} chan[NUM_CHANNELS];
} imc[NUM_IMC];
};
static int skx_num_sockets;
struct skx_pvt {
struct skx_imc *imc;
};
struct decoded_addr {
struct skx_dev *dev;
u64 addr;
int socket;
int imc;
int channel;
u64 chan_addr;
int sktways;
int chanways;
int dimm;
int rank;
int channel_rank;
u64 rank_address;
int row;
int column;
int bank_address;
int bank_group;
};
static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx) static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx)
{ {
struct skx_dev *d; struct skx_dev *d;
list_for_each_entry(d, &skx_edac_list, list) { list_for_each_entry(d, skx_edac_list, list) {
if (d->seg == pci_domain_nr(bus) && d->bus[idx] == bus->number) if (d->seg == pci_domain_nr(bus) && d->bus[idx] == bus->number)
return d; return d;
} }
...@@ -159,7 +51,7 @@ enum munittype { ...@@ -159,7 +51,7 @@ enum munittype {
struct munit { struct munit {
u16 did; u16 did;
u16 devfn[NUM_IMC]; u16 devfn[SKX_NUM_IMC];
u8 busidx; u8 busidx;
u8 per_socket; u8 per_socket;
enum munittype mtype; enum munittype mtype;
...@@ -180,45 +72,6 @@ static const struct munit skx_all_munits[] = { ...@@ -180,45 +72,6 @@ static const struct munit skx_all_munits[] = {
{ } { }
}; };
/*
* We use the per-socket device 0x2016 to count how many sockets are present,
* and to detemine which PCI buses are associated with each socket. Allocate
* and build the full list of all the skx_dev structures that we need here.
*/
static int get_all_bus_mappings(void)
{
struct pci_dev *pdev, *prev;
struct skx_dev *d;
u32 reg;
int ndev = 0;
prev = NULL;
for (;;) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev);
if (!pdev)
break;
ndev++;
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
pci_dev_put(pdev);
return -ENOMEM;
}
d->seg = pci_domain_nr(pdev->bus);
pci_read_config_dword(pdev, 0xCC, &reg);
d->bus[0] = GET_BITFIELD(reg, 0, 7);
d->bus[1] = GET_BITFIELD(reg, 8, 15);
d->bus[2] = GET_BITFIELD(reg, 16, 23);
d->bus[3] = GET_BITFIELD(reg, 24, 31);
edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
list_add_tail(&d->list, &skx_edac_list);
skx_num_sockets++;
prev = pdev;
}
return ndev;
}
static int get_all_munits(const struct munit *m) static int get_all_munits(const struct munit *m)
{ {
struct pci_dev *pdev, *prev; struct pci_dev *pdev, *prev;
...@@ -232,11 +85,11 @@ static int get_all_munits(const struct munit *m) ...@@ -232,11 +85,11 @@ static int get_all_munits(const struct munit *m)
if (!pdev) if (!pdev)
break; break;
ndev++; ndev++;
if (m->per_socket == NUM_IMC) { if (m->per_socket == SKX_NUM_IMC) {
for (i = 0; i < NUM_IMC; i++) for (i = 0; i < SKX_NUM_IMC; i++)
if (m->devfn[i] == pdev->devfn) if (m->devfn[i] == pdev->devfn)
break; break;
if (i == NUM_IMC) if (i == SKX_NUM_IMC)
goto fail; goto fail;
} }
d = get_skx_dev(pdev->bus, m->busidx); d = get_skx_dev(pdev->bus, m->busidx);
...@@ -272,11 +125,10 @@ static int get_all_munits(const struct munit *m) ...@@ -272,11 +125,10 @@ static int get_all_munits(const struct munit *m)
*/ */
pci_read_config_dword(pdev, 0xB4, &reg); pci_read_config_dword(pdev, 0xB4, &reg);
if (reg != 0) { if (reg != 0) {
if (d->mcroute == 0) if (d->mcroute == 0) {
d->mcroute = reg; d->mcroute = reg;
else if (d->mcroute != reg) { } else if (d->mcroute != reg) {
skx_printk(KERN_ERR, skx_printk(KERN_ERR, "mcroute mismatch\n");
"mcroute mismatch\n");
goto fail; goto fail;
} }
} }
...@@ -299,169 +151,8 @@ static const struct x86_cpu_id skx_cpuids[] = { ...@@ -299,169 +151,8 @@ static const struct x86_cpu_id skx_cpuids[] = {
}; };
MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); MODULE_DEVICE_TABLE(x86cpu, skx_cpuids);
static u8 get_src_id(struct skx_dev *d)
{
u32 reg;
pci_read_config_dword(d->util_all, 0xF0, &reg);
return GET_BITFIELD(reg, 12, 14);
}
static u8 skx_get_node_id(struct skx_dev *d)
{
u32 reg;
pci_read_config_dword(d->util_all, 0xF4, &reg);
return GET_BITFIELD(reg, 0, 2);
}
static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval,
int maxval, char *name)
{
u32 val = GET_BITFIELD(reg, lobit, hibit);
if (val < minval || val > maxval) {
edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg);
return -EINVAL;
}
return val + add;
}
#define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15)
#define IS_NVDIMM_PRESENT(mcddrtcfg, i) GET_BITFIELD((mcddrtcfg), (i), (i))
#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 0, 2, "ranks")
#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows")
#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols")
static int get_width(u32 mtr)
{
switch (GET_BITFIELD(mtr, 8, 9)) {
case 0:
return DEV_X4;
case 1:
return DEV_X8;
case 2:
return DEV_X16;
}
return DEV_UNKNOWN;
}
static int skx_get_hi_lo(void)
{
struct pci_dev *pdev;
u32 reg;
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL);
if (!pdev) {
edac_dbg(0, "Can't get tolm/tohm\n");
return -ENODEV;
}
pci_read_config_dword(pdev, 0xD0, &reg);
skx_tolm = reg;
pci_read_config_dword(pdev, 0xD4, &reg);
skx_tohm = reg;
pci_read_config_dword(pdev, 0xD8, &reg);
skx_tohm |= (u64)reg << 32;
pci_dev_put(pdev);
edac_dbg(2, "tolm=0x%llx tohm=0x%llx\n", skx_tolm, skx_tohm);
return 0;
}
static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
struct skx_imc *imc, int chan, int dimmno)
{
int banks = 16, ranks, rows, cols, npages;
u64 size;
ranks = numrank(mtr);
rows = numrow(mtr);
cols = numcol(mtr);
/*
* Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
*/
size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%#x, col: 0x%#x\n",
imc->mc, chan, dimmno, size, npages,
banks, 1 << ranks, rows, cols);
imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
imc->chan[chan].dimms[dimmno].rowbits = rows;
imc->chan[chan].dimms[dimmno].colbits = cols;
dimm->nr_pages = npages;
dimm->grain = 32;
dimm->dtype = get_width(mtr);
dimm->mtype = MEM_DDR4;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
imc->src_id, imc->lmc, chan, dimmno);
return 1;
}
static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
int chan, int dimmno)
{
int smbios_handle;
u32 dev_handle;
u16 flags;
u64 size = 0;
nvdimm_count++;
dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
imc->src_id, 0);
smbios_handle = nfit_get_smbios_id(dev_handle, &flags);
if (smbios_handle == -EOPNOTSUPP) {
pr_warn_once(EDAC_MOD_STR ": Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n");
goto unknown_size;
}
if (smbios_handle < 0) {
skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle);
goto unknown_size;
}
if (flags & ACPI_NFIT_MEM_MAP_FAILED) {
skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle);
goto unknown_size;
}
size = dmi_memdev_size(smbios_handle);
if (size == ~0ull)
skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n",
dev_handle, smbios_handle);
unknown_size:
dimm->nr_pages = size >> PAGE_SHIFT;
dimm->grain = 32;
dimm->dtype = DEV_UNKNOWN;
dimm->mtype = MEM_NVDIMM;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
imc->src_id, imc->lmc, chan, dimmno);
return (size == 0 || size == ~0ull) ? 0 : 1;
}
#define SKX_GET_MTMTR(dev, reg) \ #define SKX_GET_MTMTR(dev, reg) \
pci_read_config_dword((dev), 0x87c, &reg) pci_read_config_dword((dev), 0x87c, &(reg))
static bool skx_check_ecc(struct pci_dev *pdev) static bool skx_check_ecc(struct pci_dev *pdev)
{ {
...@@ -481,19 +172,22 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci) ...@@ -481,19 +172,22 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
int i, j; int i, j;
int ndimms; int ndimms;
for (i = 0; i < NUM_CHANNELS; i++) { for (i = 0; i < SKX_NUM_CHANNELS; i++) {
ndimms = 0; ndimms = 0;
pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg); pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg);
for (j = 0; j < NUM_DIMMS; j++) { for (j = 0; j < SKX_NUM_DIMMS; j++) {
dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
mci->n_layers, i, j, 0); mci->n_layers, i, j, 0);
pci_read_config_dword(imc->chan[i].cdev, pci_read_config_dword(imc->chan[i].cdev,
0x80 + 4*j, &mtr); 0x80 + 4 * j, &mtr);
if (IS_DIMM_PRESENT(mtr)) if (IS_DIMM_PRESENT(mtr)) {
ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j); ndimms += skx_get_dimm_info(mtr, amap, dimm, imc, i, j);
else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) } else if (IS_NVDIMM_PRESENT(mcddrtcfg, j)) {
ndimms += get_nvdimm_info(dimm, imc, i, j); ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
EDAC_MOD_STR);
nvdimm_count++;
}
} }
if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) { if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) {
skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc); skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc);
...@@ -504,95 +198,12 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci) ...@@ -504,95 +198,12 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci)
return 0; return 0;
} }
static void skx_unregister_mci(struct skx_imc *imc)
{
struct mem_ctl_info *mci = imc->mci;
if (!mci)
return;
edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
/* Remove MC sysfs nodes */
edac_mc_del_mc(mci->pdev);
edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
kfree(mci->ctl_name);
edac_mc_free(mci);
}
static int skx_register_mci(struct skx_imc *imc)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct pci_dev *pdev = imc->chan[0].cdev;
struct skx_pvt *pvt;
int rc;
/* allocate a new MC control structure */
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].is_virt_csrow = false;
layers[1].type = EDAC_MC_LAYER_SLOT;
layers[1].size = NUM_DIMMS;
layers[1].is_virt_csrow = true;
mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
sizeof(struct skx_pvt));
if (unlikely(!mci))
return -ENOMEM;
edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
/* Associate skx_dev and mci for future usage */
imc->mci = mci;
pvt = mci->pvt_info;
pvt->imc = imc;
mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d",
imc->node_id, imc->lmc);
if (!mci->ctl_name) {
rc = -ENOMEM;
goto fail0;
}
mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = EDAC_MOD_STR;
mci->dev_name = pci_name(imc->chan[0].cdev);
mci->ctl_page_to_phys = NULL;
rc = skx_get_dimm_config(mci);
if (rc < 0)
goto fail;
/* record ptr to the generic device */
mci->pdev = &pdev->dev;
/* add this new MC control structure to EDAC's list of MCs */
if (unlikely(edac_mc_add_mc(mci))) {
edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
rc = -EINVAL;
goto fail;
}
return 0;
fail:
kfree(mci->ctl_name);
fail0:
edac_mc_free(mci);
imc->mci = NULL;
return rc;
}
#define SKX_MAX_SAD 24 #define SKX_MAX_SAD 24
#define SKX_GET_SAD(d, i, reg) \ #define SKX_GET_SAD(d, i, reg) \
pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &reg) pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), &(reg))
#define SKX_GET_ILV(d, i, reg) \ #define SKX_GET_ILV(d, i, reg) \
pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &reg) pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), &(reg))
#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31) #define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31)
#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27) #define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27)
...@@ -607,7 +218,7 @@ static int skx_register_mci(struct skx_imc *imc) ...@@ -607,7 +218,7 @@ static int skx_register_mci(struct skx_imc *imc)
static bool skx_sad_decode(struct decoded_addr *res) static bool skx_sad_decode(struct decoded_addr *res)
{ {
struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list); struct skx_dev *d = list_first_entry(skx_edac_list, typeof(*d), list);
u64 addr = res->addr; u64 addr = res->addr;
int i, idx, tgt, lchan, shift; int i, idx, tgt, lchan, shift;
u32 sad, ilv; u32 sad, ilv;
...@@ -661,7 +272,7 @@ static bool skx_sad_decode(struct decoded_addr *res) ...@@ -661,7 +272,7 @@ static bool skx_sad_decode(struct decoded_addr *res)
return false; return false;
} }
remote = 1; remote = 1;
list_for_each_entry(d, &skx_edac_list, list) { list_for_each_entry(d, skx_edac_list, list) {
if (d->imc[0].src_id == SKX_ILV_TARGET(tgt)) if (d->imc[0].src_id == SKX_ILV_TARGET(tgt))
goto restart; goto restart;
} }
...@@ -669,9 +280,9 @@ static bool skx_sad_decode(struct decoded_addr *res) ...@@ -669,9 +280,9 @@ static bool skx_sad_decode(struct decoded_addr *res)
return false; return false;
} }
if (SKX_SAD_MOD3(sad) == 0) if (SKX_SAD_MOD3(sad) == 0) {
lchan = SKX_ILV_TARGET(tgt); lchan = SKX_ILV_TARGET(tgt);
else { } else {
switch (SKX_SAD_MOD3MODE(sad)) { switch (SKX_SAD_MOD3MODE(sad)) {
case 0: case 0:
shift = 6; shift = 6;
...@@ -717,11 +328,11 @@ static bool skx_sad_decode(struct decoded_addr *res) ...@@ -717,11 +328,11 @@ static bool skx_sad_decode(struct decoded_addr *res)
#define SKX_MAX_TAD 8 #define SKX_MAX_TAD 8
#define SKX_GET_TADBASE(d, mc, i, reg) \ #define SKX_GET_TADBASE(d, mc, i, reg) \
pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &reg) pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), &(reg))
#define SKX_GET_TADWAYNESS(d, mc, i, reg) \ #define SKX_GET_TADWAYNESS(d, mc, i, reg) \
pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &reg) pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), &(reg))
#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \ #define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \
pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &reg) pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), &(reg))
#define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26) #define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26)
#define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5) #define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5)
...@@ -793,10 +404,10 @@ static bool skx_tad_decode(struct decoded_addr *res) ...@@ -793,10 +404,10 @@ static bool skx_tad_decode(struct decoded_addr *res)
#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \ #define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \
pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \
0x108 + 4 * (i), &reg) 0x108 + 4 * (i), &(reg))
#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \ #define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \
pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \
0x120 + 16 * idx + 4 * (i), &reg) 0x120 + 16 * (idx) + 4 * (i), &(reg))
#define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31) #define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31)
#define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29) #define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29)
...@@ -854,15 +465,19 @@ static bool skx_rir_decode(struct decoded_addr *res) ...@@ -854,15 +465,19 @@ static bool skx_rir_decode(struct decoded_addr *res)
static u8 skx_close_row[] = { static u8 skx_close_row[] = {
15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
}; };
static u8 skx_close_column[] = { static u8 skx_close_column[] = {
3, 4, 5, 14, 19, 23, 24, 25, 26, 27 3, 4, 5, 14, 19, 23, 24, 25, 26, 27
}; };
static u8 skx_open_row[] = { static u8 skx_open_row[] = {
14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
}; };
static u8 skx_open_column[] = { static u8 skx_open_column[] = {
3, 4, 5, 6, 7, 8, 9, 10, 11, 12 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
}; };
static u8 skx_open_fine_column[] = { static u8 skx_open_fine_column[] = {
3, 4, 5, 7, 8, 9, 10, 11, 12, 13 3, 4, 5, 7, 8, 9, 10, 11, 12, 13
}; };
...@@ -916,340 +531,15 @@ static bool skx_mad_decode(struct decoded_addr *r) ...@@ -916,340 +531,15 @@ static bool skx_mad_decode(struct decoded_addr *r)
static bool skx_decode(struct decoded_addr *res) static bool skx_decode(struct decoded_addr *res)
{ {
return skx_sad_decode(res) && skx_tad_decode(res) && return skx_sad_decode(res) && skx_tad_decode(res) &&
skx_rir_decode(res) && skx_mad_decode(res); skx_rir_decode(res) && skx_mad_decode(res);
} }
static bool skx_adxl_decode(struct decoded_addr *res)
{
int i, len = 0;
if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
res->addr < BIT_ULL(32))) {
edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
return false;
}
if (adxl_decode(res->addr, adxl_values)) {
edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
return false;
}
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
adxl_component_names[i], adxl_values[i]);
if (MSG_SIZE - len <= 0)
break;
}
return true;
}
static void skx_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m,
struct decoded_addr *res)
{
enum hw_event_mc_err_type tp_event;
char *type, *optype;
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
bool recoverable;
u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
u32 mscod = GET_BITFIELD(m->status, 16, 31);
u32 errcode = GET_BITFIELD(m->status, 0, 15);
u32 optypenum = GET_BITFIELD(m->status, 4, 6);
recoverable = GET_BITFIELD(m->status, 56, 56);
if (uncorrected_error) {
core_err_cnt = 1;
if (ripv) {
type = "FATAL";
tp_event = HW_EVENT_ERR_FATAL;
} else {
type = "NON_FATAL";
tp_event = HW_EVENT_ERR_UNCORRECTED;
}
} else {
type = "CORRECTED";
tp_event = HW_EVENT_ERR_CORRECTED;
}
/*
* According with Table 15-9 of the Intel Architecture spec vol 3A,
* memory errors should fit in this mask:
* 000f 0000 1mmm cccc (binary)
* where:
* f = Correction Report Filtering Bit. If 1, subsequent errors
* won't be shown
* mmm = error type
* cccc = channel
* If the mask doesn't match, report an error to the parsing logic
*/
if (!((errcode & 0xef80) == 0x80)) {
optype = "Can't parse: it is not a mem";
} else {
switch (optypenum) {
case 0:
optype = "generic undef request error";
break;
case 1:
optype = "memory read error";
break;
case 2:
optype = "memory write error";
break;
case 3:
optype = "addr/cmd error";
break;
case 4:
optype = "memory scrubbing error";
break;
default:
optype = "reserved";
break;
}
}
if (adxl_component_count) {
snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, adxl_msg);
} else {
snprintf(skx_msg, MSG_SIZE,
"%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode,
res->socket, res->imc, res->rank,
res->bank_group, res->bank_address, res->row, res->column);
}
edac_dbg(0, "%s\n", skx_msg);
/* Call the helper to output message */
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
res->channel, res->dimm, -1,
optype, skx_msg);
}
static struct mem_ctl_info *get_mci(int src_id, int lmc)
{
struct skx_dev *d;
if (lmc > NUM_IMC - 1) {
skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
return NULL;
}
list_for_each_entry(d, &skx_edac_list, list) {
if (d->imc[0].src_id == src_id)
return d->imc[lmc].mci;
}
skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
return NULL;
}
static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
struct decoded_addr res;
struct mem_ctl_info *mci;
char *type;
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
/* ignore unless this is memory related with an address */
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE;
memset(&res, 0, sizeof(res));
res.addr = mce->addr;
if (adxl_component_count) {
if (!skx_adxl_decode(&res))
return NOTIFY_DONE;
mci = get_mci(res.socket, res.imc);
} else {
if (!skx_decode(&res))
return NOTIFY_DONE;
mci = res.dev->imc[res.imc].mci;
}
if (!mci)
return NOTIFY_DONE;
if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception";
else
type = "Event";
skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx "
"Bank %d: %016Lx\n", mce->extcpu, type,
mce->mcgstatus, mce->bank, mce->status);
skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc);
skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr);
skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc);
skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET "
"%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid,
mce->time, mce->socketid, mce->apicid);
skx_mce_output_error(mci, mce, &res);
return NOTIFY_DONE;
}
static struct notifier_block skx_mce_dec = { static struct notifier_block skx_mce_dec = {
.notifier_call = skx_mce_check_error, .notifier_call = skx_mce_check_error,
.priority = MCE_PRIO_EDAC, .priority = MCE_PRIO_EDAC,
}; };
#ifdef CONFIG_EDAC_DEBUG
/*
* Debug feature.
* Exercise the address decode logic by writing an address to
* /sys/kernel/debug/edac/skx_test/addr.
*/
static struct dentry *skx_test;
static int debugfs_u64_set(void *data, u64 val)
{
struct mce m;
pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
memset(&m, 0, sizeof(m));
/* ADDRV + MemRd + Unknown channel */
m.status = MCI_STATUS_ADDRV + 0x90;
/* One corrected error */
m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
m.addr = val;
skx_mce_check_error(NULL, 0, &m);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
static void setup_skx_debug(void)
{
skx_test = edac_debugfs_create_dir("skx_test");
if (!skx_test)
return;
if (!edac_debugfs_create_file("addr", 0200, skx_test,
NULL, &fops_u64_wo)) {
debugfs_remove(skx_test);
skx_test = NULL;
}
}
static void teardown_skx_debug(void)
{
debugfs_remove_recursive(skx_test);
}
#else
static void setup_skx_debug(void) {}
static void teardown_skx_debug(void) {}
#endif /*CONFIG_EDAC_DEBUG*/
static void skx_remove(void)
{
int i, j;
struct skx_dev *d, *tmp;
edac_dbg(0, "\n");
list_for_each_entry_safe(d, tmp, &skx_edac_list, list) {
list_del(&d->list);
for (i = 0; i < NUM_IMC; i++) {
skx_unregister_mci(&d->imc[i]);
for (j = 0; j < NUM_CHANNELS; j++)
pci_dev_put(d->imc[i].chan[j].cdev);
}
pci_dev_put(d->util_all);
pci_dev_put(d->sad_all);
kfree(d);
}
}
static void __init skx_adxl_get(void)
{
const char * const *names;
int i, j;
names = adxl_get_component_names();
if (!names) {
skx_printk(KERN_NOTICE, "No firmware support for address translation.");
skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
return;
}
for (i = 0; i < INDEX_MAX; i++) {
for (j = 0; names[j]; j++) {
if (!strcmp(component_names[i], names[j])) {
component_indices[i] = j;
break;
}
}
if (!names[j])
goto err;
}
adxl_component_names = names;
while (*names++)
adxl_component_count++;
adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
GFP_KERNEL);
if (!adxl_values) {
adxl_component_count = 0;
return;
}
adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
if (!adxl_msg) {
adxl_component_count = 0;
kfree(adxl_values);
}
return;
err:
skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
component_names[i]);
for (j = 0; names[j]; j++)
skx_printk(KERN_CONT, "%s ", names[j]);
skx_printk(KERN_CONT, "\n");
}
static void __exit skx_adxl_put(void)
{
kfree(adxl_values);
kfree(adxl_msg);
}
/* /*
* skx_init: * skx_init:
* make sure we are running on the correct cpu model * make sure we are running on the correct cpu model
...@@ -1261,7 +551,7 @@ static int __init skx_init(void) ...@@ -1261,7 +551,7 @@ static int __init skx_init(void)
const struct x86_cpu_id *id; const struct x86_cpu_id *id;
const struct munit *m; const struct munit *m;
const char *owner; const char *owner;
int rc = 0, i; int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8};
u8 mc = 0, src_id, node_id; u8 mc = 0, src_id, node_id;
struct skx_dev *d; struct skx_dev *d;
...@@ -1275,17 +565,18 @@ static int __init skx_init(void) ...@@ -1275,17 +565,18 @@ static int __init skx_init(void)
if (!id) if (!id)
return -ENODEV; return -ENODEV;
rc = skx_get_hi_lo(); rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm);
if (rc) if (rc)
return rc; return rc;
rc = get_all_bus_mappings(); rc = skx_get_all_bus_mappings(0x2016, 0xcc, SKX, &skx_edac_list);
if (rc < 0) if (rc < 0)
goto fail; goto fail;
if (rc == 0) { if (rc == 0) {
edac_dbg(2, "No memory controllers found\n"); edac_dbg(2, "No memory controllers found\n");
return -ENODEV; return -ENODEV;
} }
skx_num_sockets = rc;
for (m = skx_all_munits; m->did; m++) { for (m = skx_all_munits; m->did; m++) {
rc = get_all_munits(m); rc = get_all_munits(m);
...@@ -1299,34 +590,36 @@ static int __init skx_init(void) ...@@ -1299,34 +590,36 @@ static int __init skx_init(void)
} }
} }
list_for_each_entry(d, &skx_edac_list, list) { list_for_each_entry(d, skx_edac_list, list) {
src_id = get_src_id(d); rc = skx_get_src_id(d, &src_id);
node_id = skx_get_node_id(d); if (rc < 0)
goto fail;
rc = skx_get_node_id(d, &node_id);
if (rc < 0)
goto fail;
edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
for (i = 0; i < NUM_IMC; i++) { for (i = 0; i < SKX_NUM_IMC; i++) {
d->imc[i].mc = mc++; d->imc[i].mc = mc++;
d->imc[i].lmc = i; d->imc[i].lmc = i;
d->imc[i].src_id = src_id; d->imc[i].src_id = src_id;
d->imc[i].node_id = node_id; d->imc[i].node_id = node_id;
rc = skx_register_mci(&d->imc[i]); rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
"Skylake Socket", EDAC_MOD_STR,
skx_get_dimm_config);
if (rc < 0) if (rc < 0)
goto fail; goto fail;
} }
} }
skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL); skx_set_decode(skx_decode);
if (!skx_msg) {
rc = -ENOMEM;
goto fail;
}
if (nvdimm_count) if (nvdimm_count && skx_adxl_get() == -ENODEV)
skx_adxl_get(); skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
/* Ensure that the OPSTATE is set correctly for POLL or NMI */ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init(); opstate_init();
setup_skx_debug(); setup_skx_debug("skx_test");
mce_register_decode_chain(&skx_mce_dec); mce_register_decode_chain(&skx_mce_dec);
...@@ -1343,7 +636,6 @@ static void __exit skx_exit(void) ...@@ -1343,7 +636,6 @@ static void __exit skx_exit(void)
teardown_skx_debug(); teardown_skx_debug();
if (nvdimm_count) if (nvdimm_count)
skx_adxl_put(); skx_adxl_put();
kfree(skx_msg);
skx_remove(); skx_remove();
} }
......
// SPDX-License-Identifier: GPL-2.0
/*
* Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
* Originally split out from the skx_edac driver.
*
* Copyright (c) 2018, Intel Corporation.
*/
#include <linux/acpi.h>
#include <linux/dmi.h>
#include <linux/adxl.h>
#include <acpi/nfit.h>
#include <asm/mce.h>
#include "edac_module.h"
#include "skx_common.h"
static const char * const component_names[] = {
[INDEX_SOCKET] = "ProcessorSocketId",
[INDEX_MEMCTRL] = "MemoryControllerId",
[INDEX_CHANNEL] = "ChannelId",
[INDEX_DIMM] = "DimmSlotId",
};
static int component_indices[ARRAY_SIZE(component_names)];
static int adxl_component_count;
static const char * const *adxl_component_names;
static u64 *adxl_values;
static char *adxl_msg;
static char skx_msg[MSG_SIZE];
static skx_decode_f skx_decode;
static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list);
int __init skx_adxl_get(void)
{
const char * const *names;
int i, j;
names = adxl_get_component_names();
if (!names) {
skx_printk(KERN_NOTICE, "No firmware support for address translation.\n");
return -ENODEV;
}
for (i = 0; i < INDEX_MAX; i++) {
for (j = 0; names[j]; j++) {
if (!strcmp(component_names[i], names[j])) {
component_indices[i] = j;
break;
}
}
if (!names[j])
goto err;
}
adxl_component_names = names;
while (*names++)
adxl_component_count++;
adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
GFP_KERNEL);
if (!adxl_values) {
adxl_component_count = 0;
return -ENOMEM;
}
adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
if (!adxl_msg) {
adxl_component_count = 0;
kfree(adxl_values);
return -ENOMEM;
}
return 0;
err:
skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
component_names[i]);
for (j = 0; names[j]; j++)
skx_printk(KERN_CONT, "%s ", names[j]);
skx_printk(KERN_CONT, "\n");
return -ENODEV;
}
void __exit skx_adxl_put(void)
{
kfree(adxl_values);
kfree(adxl_msg);
}
static bool skx_adxl_decode(struct decoded_addr *res)
{
int i, len = 0;
if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
res->addr < BIT_ULL(32))) {
edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
return false;
}
if (adxl_decode(res->addr, adxl_values)) {
edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
return false;
}
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
adxl_component_names[i], adxl_values[i]);
if (MSG_SIZE - len <= 0)
break;
}
return true;
}
void skx_set_decode(skx_decode_f decode)
{
skx_decode = decode;
}
int skx_get_src_id(struct skx_dev *d, u8 *id)
{
u32 reg;
if (pci_read_config_dword(d->util_all, 0xf0, &reg)) {
skx_printk(KERN_ERR, "Failed to read src id\n");
return -ENODEV;
}
*id = GET_BITFIELD(reg, 12, 14);
return 0;
}
int skx_get_node_id(struct skx_dev *d, u8 *id)
{
u32 reg;
if (pci_read_config_dword(d->util_all, 0xf4, &reg)) {
skx_printk(KERN_ERR, "Failed to read node id\n");
return -ENODEV;
}
*id = GET_BITFIELD(reg, 0, 2);
return 0;
}
static int get_width(u32 mtr)
{
switch (GET_BITFIELD(mtr, 8, 9)) {
case 0:
return DEV_X4;
case 1:
return DEV_X8;
case 2:
return DEV_X16;
}
return DEV_UNKNOWN;
}
/*
* We use the per-socket device @did to count how many sockets are present,
* and to detemine which PCI buses are associated with each socket. Allocate
* and build the full list of all the skx_dev structures that we need here.
*/
int skx_get_all_bus_mappings(unsigned int did, int off, enum type type,
struct list_head **list)
{
struct pci_dev *pdev, *prev;
struct skx_dev *d;
u32 reg;
int ndev = 0;
prev = NULL;
for (;;) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, prev);
if (!pdev)
break;
ndev++;
d = kzalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
pci_dev_put(pdev);
return -ENOMEM;
}
if (pci_read_config_dword(pdev, off, &reg)) {
kfree(d);
pci_dev_put(pdev);
skx_printk(KERN_ERR, "Failed to read bus idx\n");
return -ENODEV;
}
d->bus[0] = GET_BITFIELD(reg, 0, 7);
d->bus[1] = GET_BITFIELD(reg, 8, 15);
if (type == SKX) {
d->seg = pci_domain_nr(pdev->bus);
d->bus[2] = GET_BITFIELD(reg, 16, 23);
d->bus[3] = GET_BITFIELD(reg, 24, 31);
} else {
d->seg = GET_BITFIELD(reg, 16, 23);
}
edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
list_add_tail(&d->list, &dev_edac_list);
prev = pdev;
}
if (list)
*list = &dev_edac_list;
return ndev;
}
int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
{
struct pci_dev *pdev;
u32 reg;
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL);
if (!pdev) {
skx_printk(KERN_ERR, "Can't get tolm/tohm\n");
return -ENODEV;
}
if (pci_read_config_dword(pdev, off[0], &reg)) {
skx_printk(KERN_ERR, "Failed to read tolm\n");
goto fail;
}
skx_tolm = reg;
if (pci_read_config_dword(pdev, off[1], &reg)) {
skx_printk(KERN_ERR, "Failed to read lower tohm\n");
goto fail;
}
skx_tohm = reg;
if (pci_read_config_dword(pdev, off[2], &reg)) {
skx_printk(KERN_ERR, "Failed to read upper tohm\n");
goto fail;
}
skx_tohm |= (u64)reg << 32;
pci_dev_put(pdev);
*tolm = skx_tolm;
*tohm = skx_tohm;
edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm, skx_tohm);
return 0;
fail:
pci_dev_put(pdev);
return -ENODEV;
}
static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
int minval, int maxval, const char *name)
{
u32 val = GET_BITFIELD(reg, lobit, hibit);
if (val < minval || val > maxval) {
edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg);
return -EINVAL;
}
return val + add;
}
#define numrank(reg) skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks")
#define numrow(reg) skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
#define numcol(reg) skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
struct skx_imc *imc, int chan, int dimmno)
{
int banks = 16, ranks, rows, cols, npages;
u64 size;
ranks = numrank(mtr);
rows = numrow(mtr);
cols = numcol(mtr);
/*
* Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
*/
size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
npages = MiB_TO_PAGES(size);
edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n",
imc->mc, chan, dimmno, size, npages,
banks, 1 << ranks, rows, cols);
imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0);
imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9);
imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
imc->chan[chan].dimms[dimmno].rowbits = rows;
imc->chan[chan].dimms[dimmno].colbits = cols;
dimm->nr_pages = npages;
dimm->grain = 32;
dimm->dtype = get_width(mtr);
dimm->mtype = MEM_DDR4;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
imc->src_id, imc->lmc, chan, dimmno);
return 1;
}
int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
int chan, int dimmno, const char *mod_str)
{
int smbios_handle;
u32 dev_handle;
u16 flags;
u64 size = 0;
dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
imc->src_id, 0);
smbios_handle = nfit_get_smbios_id(dev_handle, &flags);
if (smbios_handle == -EOPNOTSUPP) {
pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str);
goto unknown_size;
}
if (smbios_handle < 0) {
skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle);
goto unknown_size;
}
if (flags & ACPI_NFIT_MEM_MAP_FAILED) {
skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle);
goto unknown_size;
}
size = dmi_memdev_size(smbios_handle);
if (size == ~0ull)
skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n",
dev_handle, smbios_handle);
unknown_size:
dimm->nr_pages = size >> PAGE_SHIFT;
dimm->grain = 32;
dimm->dtype = DEV_UNKNOWN;
dimm->mtype = MEM_NVDIMM;
dimm->edac_mode = EDAC_SECDED; /* likely better than this */
edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
imc->src_id, imc->lmc, chan, dimmno);
return (size == 0 || size == ~0ull) ? 0 : 1;
}
int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
const char *ctl_name, const char *mod_str,
get_dimm_config_f get_dimm_config)
{
struct mem_ctl_info *mci;
struct edac_mc_layer layers[2];
struct skx_pvt *pvt;
int rc;
/* Allocate a new MC control structure */
layers[0].type = EDAC_MC_LAYER_CHANNEL;
layers[0].size = NUM_CHANNELS;
layers[0].is_virt_csrow = false;
layers[1].type = EDAC_MC_LAYER_SLOT;
layers[1].size = NUM_DIMMS;
layers[1].is_virt_csrow = true;
mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
sizeof(struct skx_pvt));
if (unlikely(!mci))
return -ENOMEM;
edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
/* Associate skx_dev and mci for future usage */
imc->mci = mci;
pvt = mci->pvt_info;
pvt->imc = imc;
mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name,
imc->node_id, imc->lmc);
if (!mci->ctl_name) {
rc = -ENOMEM;
goto fail0;
}
mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM;
mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = mod_str;
mci->dev_name = pci_name(pdev);
mci->ctl_page_to_phys = NULL;
rc = get_dimm_config(mci);
if (rc < 0)
goto fail;
/* Record ptr to the generic device */
mci->pdev = &pdev->dev;
/* Add this new MC control structure to EDAC's list of MCs */
if (unlikely(edac_mc_add_mc(mci))) {
edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
rc = -EINVAL;
goto fail;
}
return 0;
fail:
kfree(mci->ctl_name);
fail0:
edac_mc_free(mci);
imc->mci = NULL;
return rc;
}
static void skx_unregister_mci(struct skx_imc *imc)
{
struct mem_ctl_info *mci = imc->mci;
if (!mci)
return;
edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
/* Remove MC sysfs nodes */
edac_mc_del_mc(mci->pdev);
edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
kfree(mci->ctl_name);
edac_mc_free(mci);
}
static struct mem_ctl_info *get_mci(int src_id, int lmc)
{
struct skx_dev *d;
if (lmc > NUM_IMC - 1) {
skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
return NULL;
}
list_for_each_entry(d, &dev_edac_list, list) {
if (d->imc[0].src_id == src_id)
return d->imc[lmc].mci;
}
skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
return NULL;
}
static void skx_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m,
struct decoded_addr *res)
{
enum hw_event_mc_err_type tp_event;
char *type, *optype;
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
bool recoverable;
u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
u32 mscod = GET_BITFIELD(m->status, 16, 31);
u32 errcode = GET_BITFIELD(m->status, 0, 15);
u32 optypenum = GET_BITFIELD(m->status, 4, 6);
recoverable = GET_BITFIELD(m->status, 56, 56);
if (uncorrected_error) {
core_err_cnt = 1;
if (ripv) {
type = "FATAL";
tp_event = HW_EVENT_ERR_FATAL;
} else {
type = "NON_FATAL";
tp_event = HW_EVENT_ERR_UNCORRECTED;
}
} else {
type = "CORRECTED";
tp_event = HW_EVENT_ERR_CORRECTED;
}
/*
* According to Intel Architecture spec vol 3B,
* Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
* memory errors should fit one of these masks:
* 000f 0000 1mmm cccc (binary)
* 000f 0010 1mmm cccc (binary) [RAM used as cache]
* where:
* f = Correction Report Filtering Bit. If 1, subsequent errors
* won't be shown
* mmm = error type
* cccc = channel
* If the mask doesn't match, report an error to the parsing logic
*/
if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) {
optype = "Can't parse: it is not a mem";
} else {
switch (optypenum) {
case 0:
optype = "generic undef request error";
break;
case 1:
optype = "memory read error";
break;
case 2:
optype = "memory write error";
break;
case 3:
optype = "addr/cmd error";
break;
case 4:
optype = "memory scrubbing error";
break;
default:
optype = "reserved";
break;
}
}
if (adxl_component_count) {
snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, adxl_msg);
} else {
snprintf(skx_msg, MSG_SIZE,
"%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode,
res->socket, res->imc, res->rank,
res->bank_group, res->bank_address, res->row, res->column);
}
edac_dbg(0, "%s\n", skx_msg);
/* Call the helper to output message */
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
res->channel, res->dimm, -1,
optype, skx_msg);
}
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *mce = (struct mce *)data;
struct decoded_addr res;
struct mem_ctl_info *mci;
char *type;
if (edac_get_report_status() == EDAC_REPORTING_DISABLED)
return NOTIFY_DONE;
/* ignore unless this is memory related with an address */
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE;
memset(&res, 0, sizeof(res));
res.addr = mce->addr;
if (adxl_component_count) {
if (!skx_adxl_decode(&res))
return NOTIFY_DONE;
mci = get_mci(res.socket, res.imc);
} else {
if (!skx_decode || !skx_decode(&res))
return NOTIFY_DONE;
mci = res.dev->imc[res.imc].mci;
}
if (!mci)
return NOTIFY_DONE;
if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception";
else
type = "Event";
skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx "
"Bank %d: 0x%llx\n", mce->extcpu, type,
mce->mcgstatus, mce->bank, mce->status);
skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc);
skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr);
skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc);
skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET "
"%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid,
mce->time, mce->socketid, mce->apicid);
skx_mce_output_error(mci, mce, &res);
return NOTIFY_DONE;
}
void skx_remove(void)
{
int i, j;
struct skx_dev *d, *tmp;
edac_dbg(0, "\n");
list_for_each_entry_safe(d, tmp, &dev_edac_list, list) {
list_del(&d->list);
for (i = 0; i < NUM_IMC; i++) {
if (d->imc[i].mci)
skx_unregister_mci(&d->imc[i]);
if (d->imc[i].mdev)
pci_dev_put(d->imc[i].mdev);
if (d->imc[i].mbase)
iounmap(d->imc[i].mbase);
for (j = 0; j < NUM_CHANNELS; j++) {
if (d->imc[i].chan[j].cdev)
pci_dev_put(d->imc[i].chan[j].cdev);
}
}
if (d->util_all)
pci_dev_put(d->util_all);
if (d->sad_all)
pci_dev_put(d->sad_all);
if (d->uracu)
pci_dev_put(d->uracu);
kfree(d);
}
}
#ifdef CONFIG_EDAC_DEBUG
/*
* Debug feature.
* Exercise the address decode logic by writing an address to
* /sys/kernel/debug/edac/dirname/addr.
*/
static struct dentry *skx_test;
static int debugfs_u64_set(void *data, u64 val)
{
struct mce m;
pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
memset(&m, 0, sizeof(m));
/* ADDRV + MemRd + Unknown channel */
m.status = MCI_STATUS_ADDRV + 0x90;
/* One corrected error */
m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
m.addr = val;
skx_mce_check_error(NULL, 0, &m);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
void setup_skx_debug(const char *dirname)
{
skx_test = edac_debugfs_create_dir(dirname);
if (!skx_test)
return;
if (!edac_debugfs_create_file("addr", 0200, skx_test,
NULL, &fops_u64_wo)) {
debugfs_remove(skx_test);
skx_test = NULL;
}
}
void teardown_skx_debug(void)
{
debugfs_remove_recursive(skx_test);
}
#endif /*CONFIG_EDAC_DEBUG*/
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
* Originally split out from the skx_edac driver.
*
* Copyright (c) 2018, Intel Corporation.
*/
#ifndef _SKX_COMM_EDAC_H
#define _SKX_COMM_EDAC_H
#define MSG_SIZE 1024
/*
* Debug macros
*/
#define skx_printk(level, fmt, arg...) \
edac_printk(level, "skx", fmt, ##arg)
#define skx_mc_printk(mci, level, fmt, arg...) \
edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
/*
* Get a bit field at register value <v>, from bit <lo> to bit <hi>
*/
#define GET_BITFIELD(v, lo, hi) \
(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
#define SKX_NUM_IMC 2 /* Memory controllers per socket */
#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
#define I10NM_NUM_IMC 4
#define I10NM_NUM_CHANNELS 2
#define I10NM_NUM_DIMMS 2
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
#define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
#define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
#define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
#define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
/*
* Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two
* memory controllers on the die.
*/
struct skx_dev {
struct list_head list;
u8 bus[4];
int seg;
struct pci_dev *sad_all;
struct pci_dev *util_all;
struct pci_dev *uracu; /* for i10nm CPU */
u32 mcroute;
struct skx_imc {
struct mem_ctl_info *mci;
struct pci_dev *mdev; /* for i10nm CPU */
void __iomem *mbase; /* for i10nm CPU */
u8 mc; /* system wide mc# */
u8 lmc; /* socket relative mc# */
u8 src_id, node_id;
struct skx_channel {
struct pci_dev *cdev;
struct skx_dimm {
u8 close_pg;
u8 bank_xor_enable;
u8 fine_grain_bank;
u8 rowbits;
u8 colbits;
} dimms[NUM_DIMMS];
} chan[NUM_CHANNELS];
} imc[NUM_IMC];
};
struct skx_pvt {
struct skx_imc *imc;
};
enum type {
SKX,
I10NM
};
enum {
INDEX_SOCKET,
INDEX_MEMCTRL,
INDEX_CHANNEL,
INDEX_DIMM,
INDEX_MAX
};
struct decoded_addr {
struct skx_dev *dev;
u64 addr;
int socket;
int imc;
int channel;
u64 chan_addr;
int sktways;
int chanways;
int dimm;
int rank;
int channel_rank;
u64 rank_address;
int row;
int column;
int bank_address;
int bank_group;
};
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci);
typedef bool (*skx_decode_f)(struct decoded_addr *res);
int __init skx_adxl_get(void);
void __exit skx_adxl_put(void);
void skx_set_decode(skx_decode_f decode);
int skx_get_src_id(struct skx_dev *d, u8 *id);
int skx_get_node_id(struct skx_dev *d, u8 *id);
int skx_get_all_bus_mappings(unsigned int did, int off, enum type,
struct list_head **list);
int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
int skx_get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm,
struct skx_imc *imc, int chan, int dimmno);
int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
int chan, int dimmno, const char *mod_str);
int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
const char *ctl_name, const char *mod_str,
get_dimm_config_f get_dimm_config);
int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data);
void skx_remove(void);
#ifdef CONFIG_EDAC_DEBUG
void setup_skx_debug(const char *dirname);
void teardown_skx_debug(void);
#else
static inline void setup_skx_debug(const char *dirname) {}
static inline void teardown_skx_debug(void) {}
#endif /*CONFIG_EDAC_DEBUG*/
#endif /* _SKX_COMM_EDAC_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment