Commit 85184966 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_updates_for_v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:

 - a couple of fixes/improvements to amd64_edac:
    * merge debugging and error injection functionality into the main driver
    * tone down info/error output
    * do not attempt to load it on F15h client hw

 - misc fixes to other drivers

* tag 'edac_updates_for_v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/amd64: Issue probing messages only on properly detected hardware
  EDAC/xgene: Do not print a failure message to get an IRQ twice
  EDAC/ppc4xx: Convert comma to semicolon
  EDAC/amd64: Limit error injection functionality to supported hw
  EDAC/amd64: Merge error injection sysfs facilities
  EDAC/amd64: Merge sysfs debugging attributes setup code
  EDAC/amd64: Tone down messages about missing PCI IDs
  EDAC/amd64: Do not load on family 0x15, model 0x13
parents e767b353 6118b488
...@@ -81,14 +81,13 @@ config EDAC_AMD64 ...@@ -81,14 +81,13 @@ config EDAC_AMD64
Support for error detection and correction of DRAM ECC errors on Support for error detection and correction of DRAM ECC errors on
the AMD64 families (>= K8) of memory controllers. the AMD64 families (>= K8) of memory controllers.
config EDAC_AMD64_ERROR_INJECTION When EDAC_DEBUG is enabled, hardware error injection facilities
bool "Sysfs HW Error injection facilities" through sysfs are available:
depends on EDAC_AMD64
help AMD CPUs up to and excluding family 0x17 provide for Memory
Recent Opterons (Family 10h and later) provide for Memory Error Error Injection into the ECC detection circuits. The amd64_edac
Injection into the ECC detection circuits. The amd64_edac module module allows the operator/user to inject Uncorrectable and
allows the operator/user to inject Uncorrectable and Correctable Correctable errors into DRAM.
errors into DRAM.
When enabled, in each of the respective memory controller directories When enabled, in each of the respective memory controller directories
(/sys/devices/system/edac/mc/mcX), there are 3 input files: (/sys/devices/system/edac/mc/mcX), there are 3 input files:
......
...@@ -44,12 +44,7 @@ obj-$(CONFIG_EDAC_IE31200) += ie31200_edac.o ...@@ -44,12 +44,7 @@ obj-$(CONFIG_EDAC_IE31200) += ie31200_edac.o
obj-$(CONFIG_EDAC_X38) += x38_edac.o obj-$(CONFIG_EDAC_X38) += x38_edac.o
obj-$(CONFIG_EDAC_I82860) += i82860_edac.o obj-$(CONFIG_EDAC_I82860) += i82860_edac.o
obj-$(CONFIG_EDAC_R82600) += r82600_edac.o obj-$(CONFIG_EDAC_R82600) += r82600_edac.o
obj-$(CONFIG_EDAC_AMD64) += amd64_edac.o
amd64_edac_mod-y := amd64_edac.o
amd64_edac_mod-$(CONFIG_EDAC_DEBUG) += amd64_edac_dbg.o
amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o
obj-$(CONFIG_EDAC_AMD64) += amd64_edac_mod.o
obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o
......
This diff is collapsed.
...@@ -462,14 +462,6 @@ struct ecc_settings { ...@@ -462,14 +462,6 @@ struct ecc_settings {
} flags; } flags;
}; };
#ifdef CONFIG_EDAC_DEBUG
extern const struct attribute_group amd64_edac_dbg_group;
#endif
#ifdef CONFIG_EDAC_AMD64_ERROR_INJECTION
extern const struct attribute_group amd64_edac_inj_group;
#endif
/* /*
* Each of the PCI Device IDs types have their own set of hardware accessor * Each of the PCI Device IDs types have their own set of hardware accessor
* functions and per device encoding/decoding logic. * functions and per device encoding/decoding logic.
...@@ -501,9 +493,6 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, ...@@ -501,9 +493,6 @@ int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset,
#define amd64_write_pci_cfg(pdev, offset, val) \ #define amd64_write_pci_cfg(pdev, offset, val) \
__amd64_write_pci_cfg_dword(pdev, offset, val, __func__) __amd64_write_pci_cfg_dword(pdev, offset, val, __func__)
int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size);
#define to_mci(k) container_of(k, struct mem_ctl_info, dev) #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
/* Injection helpers */ /* Injection helpers */
......
// SPDX-License-Identifier: GPL-2.0
#include "amd64_edac.h"
#define EDAC_DCT_ATTR_SHOW(reg) \
static ssize_t amd64_##reg##_show(struct device *dev, \
struct device_attribute *mattr, \
char *data) \
{ \
struct mem_ctl_info *mci = to_mci(dev); \
struct amd64_pvt *pvt = mci->pvt_info; \
return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \
}
EDAC_DCT_ATTR_SHOW(dhar);
EDAC_DCT_ATTR_SHOW(dbam0);
EDAC_DCT_ATTR_SHOW(top_mem);
EDAC_DCT_ATTR_SHOW(top_mem2);
static ssize_t amd64_hole_show(struct device *dev,
struct device_attribute *mattr,
char *data)
{
struct mem_ctl_info *mci = to_mci(dev);
u64 hole_base = 0;
u64 hole_offset = 0;
u64 hole_size = 0;
amd64_get_dram_hole_info(mci, &hole_base, &hole_offset, &hole_size);
return sprintf(data, "%llx %llx %llx\n", hole_base, hole_offset,
hole_size);
}
/*
* update NUM_DBG_ATTRS in case you add new members
*/
static DEVICE_ATTR(dhar, S_IRUGO, amd64_dhar_show, NULL);
static DEVICE_ATTR(dbam, S_IRUGO, amd64_dbam0_show, NULL);
static DEVICE_ATTR(topmem, S_IRUGO, amd64_top_mem_show, NULL);
static DEVICE_ATTR(topmem2, S_IRUGO, amd64_top_mem2_show, NULL);
static DEVICE_ATTR(dram_hole, S_IRUGO, amd64_hole_show, NULL);
static struct attribute *amd64_edac_dbg_attrs[] = {
&dev_attr_dhar.attr,
&dev_attr_dbam.attr,
&dev_attr_topmem.attr,
&dev_attr_topmem2.attr,
&dev_attr_dram_hole.attr,
NULL
};
const struct attribute_group amd64_edac_dbg_group = {
.attrs = amd64_edac_dbg_attrs,
};
// SPDX-License-Identifier: GPL-2.0
#include "amd64_edac.h"
static ssize_t amd64_inject_section_show(struct device *dev,
struct device_attribute *mattr,
char *buf)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.section);
}
/*
* store error injection section value which refers to one of 4 16-byte sections
* within a 64-byte cacheline
*
* range: 0..3
*/
static ssize_t amd64_inject_section_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret;
ret = kstrtoul(data, 10, &value);
if (ret < 0)
return ret;
if (value > 3) {
amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
return -EINVAL;
}
pvt->injection.section = (u32) value;
return count;
}
static ssize_t amd64_inject_word_show(struct device *dev,
struct device_attribute *mattr,
char *buf)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.word);
}
/*
* store error injection word value which refers to one of 9 16-bit word of the
* 16-byte (128-bit + ECC bits) section
*
* range: 0..8
*/
static ssize_t amd64_inject_word_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret;
ret = kstrtoul(data, 10, &value);
if (ret < 0)
return ret;
if (value > 8) {
amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
return -EINVAL;
}
pvt->injection.word = (u32) value;
return count;
}
static ssize_t amd64_inject_ecc_vector_show(struct device *dev,
struct device_attribute *mattr,
char *buf)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
return sprintf(buf, "0x%x\n", pvt->injection.bit_map);
}
/*
* store 16 bit error injection vector which enables injecting errors to the
* corresponding bit within the error injection word above. When used during a
* DRAM ECC read, it holds the contents of the of the DRAM ECC bits.
*/
static ssize_t amd64_inject_ecc_vector_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
int ret;
ret = kstrtoul(data, 16, &value);
if (ret < 0)
return ret;
if (value & 0xFFFF0000) {
amd64_warn("%s: invalid EccVector: 0x%lx\n", __func__, value);
return -EINVAL;
}
pvt->injection.bit_map = (u32) value;
return count;
}
/*
* Do a DRAM ECC read. Assemble staged values in the pvt area, format into
* fields needed by the injection registers and read the NB Array Data Port.
*/
static ssize_t amd64_inject_read_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
unsigned long value;
u32 section, word_bits;
int ret;
ret = kstrtoul(data, 10, &value);
if (ret < 0)
return ret;
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection);
/* Issue 'word' and 'bit' along with the READ request */
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
return count;
}
/*
* Do a DRAM ECC write. Assemble staged values in the pvt area and format into
* fields needed by the injection registers.
*/
static ssize_t amd64_inject_write_store(struct device *dev,
struct device_attribute *mattr,
const char *data, size_t count)
{
struct mem_ctl_info *mci = to_mci(dev);
struct amd64_pvt *pvt = mci->pvt_info;
u32 section, word_bits, tmp;
unsigned long value;
int ret;
ret = kstrtoul(data, 10, &value);
if (ret < 0)
return ret;
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM | SET_NB_ARRAY_ADDR(pvt->injection.section);
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_ADDR, section);
word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection);
pr_notice_once("Don't forget to decrease MCE polling interval in\n"
"/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n"
"so that you can get the error report faster.\n");
on_each_cpu(disable_caches, NULL, 1);
/* Issue 'word' and 'bit' along with the READ request */
amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
retry:
/* wait until injection happens */
amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp);
if (tmp & F10_NB_ARR_ECC_WR_REQ) {
cpu_relax();
goto retry;
}
on_each_cpu(enable_caches, NULL, 1);
edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits);
return count;
}
/*
* update NUM_INJ_ATTRS in case you add new members
*/
static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
amd64_inject_section_show, amd64_inject_section_store);
static DEVICE_ATTR(inject_word, S_IRUGO | S_IWUSR,
amd64_inject_word_show, amd64_inject_word_store);
static DEVICE_ATTR(inject_ecc_vector, S_IRUGO | S_IWUSR,
amd64_inject_ecc_vector_show, amd64_inject_ecc_vector_store);
static DEVICE_ATTR(inject_write, S_IWUSR,
NULL, amd64_inject_write_store);
static DEVICE_ATTR(inject_read, S_IWUSR,
NULL, amd64_inject_read_store);
static struct attribute *amd64_edac_inj_attrs[] = {
&dev_attr_inject_section.attr,
&dev_attr_inject_word.attr,
&dev_attr_inject_ecc_vector.attr,
&dev_attr_inject_write.attr,
&dev_attr_inject_read.attr,
NULL
};
static umode_t amd64_edac_inj_is_visible(struct kobject *kobj,
struct attribute *attr, int idx)
{
struct device *dev = kobj_to_dev(kobj);
struct mem_ctl_info *mci = container_of(dev, struct mem_ctl_info, dev);
struct amd64_pvt *pvt = mci->pvt_info;
if (pvt->fam < 0x10)
return 0;
return attr->mode;
}
const struct attribute_group amd64_edac_inj_group = {
.attrs = amd64_edac_inj_attrs,
.is_visible = amd64_edac_inj_is_visible,
};
...@@ -1058,7 +1058,7 @@ static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci, ...@@ -1058,7 +1058,7 @@ static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci,
/* Initialize strings */ /* Initialize strings */
mci->mod_name = PPC4XX_EDAC_MODULE_NAME; mci->mod_name = PPC4XX_EDAC_MODULE_NAME;
mci->ctl_name = ppc4xx_edac_match->compatible, mci->ctl_name = ppc4xx_edac_match->compatible;
mci->dev_name = np->full_name; mci->dev_name = np->full_name;
/* Initialize callbacks */ /* Initialize callbacks */
......
...@@ -1916,7 +1916,7 @@ static int xgene_edac_probe(struct platform_device *pdev) ...@@ -1916,7 +1916,7 @@ static int xgene_edac_probe(struct platform_device *pdev)
int i; int i;
for (i = 0; i < 3; i++) { for (i = 0; i < 3; i++) {
irq = platform_get_irq(pdev, i); irq = platform_get_irq_optional(pdev, i);
if (irq < 0) { if (irq < 0) {
dev_err(&pdev->dev, "No IRQ resource\n"); dev_err(&pdev->dev, "No IRQ resource\n");
rc = -EINVAL; rc = -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment