Commit bf767625 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'edac_updates_for_v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:

 - Add support for Skylake-S CPUs to ie31200_edac

 - Improve error decoding speed of the Intel drivers by avoiding the
   ACPI facilities but doing decoding in the driver itself

 - Other misc improvements to the Intel drivers

 - The usual cleanups and fixlets all over EDAC land

* tag 'edac_updates_for_v6.1' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
  EDAC/i7300: Correct the i7300_exit() function name in comment
  x86/sb_edac: Add row column translation for Broadwell
  EDAC/i10nm: Print an extra register set of retry_rd_err_log
  EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM
  EDAC/skx_common: Add ChipSelect ADXL component
  EDAC/ppc_4xx: Reorder symbols to get rid of a few forward declarations
  EDAC: Remove obsolete declarations in edac_module.h
  EDAC/i10nm: Add driver decoder for Ice Lake and Tremont CPUs
  EDAC/skx_common: Make output format similar
  EDAC/skx_common: Use driver decoder first
  EDAC/mc: Drop duplicated dimm->nr_pages debug printout
  EDAC/mc: Replace spaces with tabs in memtype flags definition
  EDAC/wq: Remove unneeded flush_workqueue()
  EDAC/ie31200: Add Skylake-S support
parents 725737e7 c2577956
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */ #define MCI_STATUS_CEC_SHIFT 38 /* Corrected Error Count */
#define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38) #define MCI_STATUS_CEC_MASK GENMASK_ULL(52,38)
#define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT) #define MCI_STATUS_CEC(c) (((c) & MCI_STATUS_CEC_MASK) >> MCI_STATUS_CEC_SHIFT)
#define MCI_STATUS_MSCOD(m) (((m) >> 16) & 0xffff)
/* AMD-specific bits */ /* AMD-specific bits */
#define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */ #define MCI_STATUS_TCC BIT_ULL(55) /* Task context corrupt */
......
...@@ -103,7 +103,6 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm) ...@@ -103,7 +103,6 @@ static void edac_mc_dump_dimm(struct dimm_info *dimm)
edac_dbg(4, " dimm->label = '%s'\n", dimm->label); edac_dbg(4, " dimm->label = '%s'\n", dimm->label);
edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages); edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
edac_dbg(4, " dimm->grain = %d\n", dimm->grain); edac_dbg(4, " dimm->grain = %d\n", dimm->grain);
edac_dbg(4, " dimm->nr_pages = 0x%x\n", dimm->nr_pages);
} }
static void edac_mc_dump_csrow(struct csrow_info *csrow) static void edac_mc_dump_csrow(struct csrow_info *csrow)
......
...@@ -28,13 +28,9 @@ void edac_mc_sysfs_exit(void); ...@@ -28,13 +28,9 @@ void edac_mc_sysfs_exit(void);
extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci, extern int edac_create_sysfs_mci_device(struct mem_ctl_info *mci,
const struct attribute_group **groups); const struct attribute_group **groups);
extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci); extern void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci);
extern int edac_get_log_ue(void);
extern int edac_get_log_ce(void);
extern int edac_get_panic_on_ue(void);
extern int edac_mc_get_log_ue(void); extern int edac_mc_get_log_ue(void);
extern int edac_mc_get_log_ce(void); extern int edac_mc_get_log_ce(void);
extern int edac_mc_get_panic_on_ue(void); extern int edac_mc_get_panic_on_ue(void);
extern int edac_get_poll_msec(void);
extern unsigned int edac_mc_get_poll_msec(void); extern unsigned int edac_mc_get_poll_msec(void);
unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
......
This diff is collapsed.
...@@ -1193,7 +1193,7 @@ static int __init i7300_init(void) ...@@ -1193,7 +1193,7 @@ static int __init i7300_init(void)
} }
/** /**
* i7300_init() - Unregisters the driver * i7300_exit() - Unregisters the driver
*/ */
static void __exit i7300_exit(void) static void __exit i7300_exit(void)
{ {
......
...@@ -20,11 +20,15 @@ ...@@ -20,11 +20,15 @@
* 0c08: Xeon E3-1200 v3 Processor DRAM Controller * 0c08: Xeon E3-1200 v3 Processor DRAM Controller
* 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
* 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers * 5918: Xeon E3-1200 Xeon E3-1200 v6/7th Gen Core Processor Host Bridge/DRAM Registers
* 190f: 6th Gen Core Dual-Core Processor Host Bridge/DRAM Registers
* 191f: 6th Gen Core Quad-Core Processor Host Bridge/DRAM Registers
* 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers * 3e..: 8th/9th Gen Core Processor Host Bridge/DRAM Registers
* *
* Based on Intel specification: * Based on Intel specification:
* https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf * https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
* http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html * http://www.intel.com/content/www/us/en/processors/xeon/xeon-e3-1200-family-vol-2-datasheet.html
* https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/desktop-6th-gen-core-family-datasheet-vol-2.pdf
* https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v6-vol-2-datasheet.pdf
* https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html * https://www.intel.com/content/www/us/en/processors/core/7th-gen-core-family-mobile-h-processor-lines-datasheet-vol-2.html
* https://www.intel.com/content/www/us/en/products/docs/processors/core/8th-gen-core-family-datasheet-vol-2.html * https://www.intel.com/content/www/us/en/products/docs/processors/core/8th-gen-core-family-datasheet-vol-2.html
* *
...@@ -53,15 +57,17 @@ ...@@ -53,15 +57,17 @@
#define ie31200_printk(level, fmt, arg...) \ #define ie31200_printk(level, fmt, arg...) \
edac_printk(level, "ie31200", fmt, ##arg) edac_printk(level, "ie31200", fmt, ##arg)
#define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108 #define PCI_DEVICE_ID_INTEL_IE31200_HB_1 0x0108
#define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c #define PCI_DEVICE_ID_INTEL_IE31200_HB_2 0x010c
#define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150 #define PCI_DEVICE_ID_INTEL_IE31200_HB_3 0x0150
#define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158 #define PCI_DEVICE_ID_INTEL_IE31200_HB_4 0x0158
#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c #define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c
#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04 #define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08 #define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918 #define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x190F
#define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x5918 #define PCI_DEVICE_ID_INTEL_IE31200_HB_9 0x1918
#define PCI_DEVICE_ID_INTEL_IE31200_HB_10 0x191F
#define PCI_DEVICE_ID_INTEL_IE31200_HB_11 0x5918
/* Coffee Lake-S */ /* Coffee Lake-S */
#define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00 #define PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK 0x3e00
...@@ -80,6 +86,8 @@ ...@@ -80,6 +86,8 @@
#define DEVICE_ID_SKYLAKE_OR_LATER(did) \ #define DEVICE_ID_SKYLAKE_OR_LATER(did) \
(((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \ (((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_8) || \
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \ ((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_9) || \
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_10) || \
((did) == PCI_DEVICE_ID_INTEL_IE31200_HB_11) || \
(((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \ (((did) & PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK) == \
PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK)) PCI_DEVICE_ID_INTEL_IE31200_HB_CFL_MASK))
...@@ -577,6 +585,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = { ...@@ -577,6 +585,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
{ PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_9), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_10), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_11), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_1), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_2), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
{ PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 }, { PCI_VEND_DEV(INTEL, IE31200_HB_CFL_3), PCI_ANY_ID, PCI_ANY_ID, 0, 0, IE31200 },
......
...@@ -178,11 +178,6 @@ struct ppc4xx_ecc_status { ...@@ -178,11 +178,6 @@ struct ppc4xx_ecc_status {
u32 wmirq; u32 wmirq;
}; };
/* Function Prototypes */
static int ppc4xx_edac_probe(struct platform_device *device);
static int ppc4xx_edac_remove(struct platform_device *device);
/* Global Variables */ /* Global Variables */
/* /*
...@@ -197,15 +192,6 @@ static const struct of_device_id ppc4xx_edac_match[] = { ...@@ -197,15 +192,6 @@ static const struct of_device_id ppc4xx_edac_match[] = {
}; };
MODULE_DEVICE_TABLE(of, ppc4xx_edac_match); MODULE_DEVICE_TABLE(of, ppc4xx_edac_match);
static struct platform_driver ppc4xx_edac_driver = {
.probe = ppc4xx_edac_probe,
.remove = ppc4xx_edac_remove,
.driver = {
.name = PPC4XX_EDAC_MODULE_NAME,
.of_match_table = ppc4xx_edac_match,
},
};
/* /*
* TODO: The row and channel parameters likely need to be dynamically * TODO: The row and channel parameters likely need to be dynamically
* set based on the aforementioned variant controller realizations. * set based on the aforementioned variant controller realizations.
...@@ -1391,6 +1377,15 @@ ppc4xx_edac_opstate_init(void) ...@@ -1391,6 +1377,15 @@ ppc4xx_edac_opstate_init(void)
EDAC_OPSTATE_UNKNOWN_STR))); EDAC_OPSTATE_UNKNOWN_STR)));
} }
static struct platform_driver ppc4xx_edac_driver = {
.probe = ppc4xx_edac_probe,
.remove = ppc4xx_edac_remove,
.driver = {
.name = PPC4XX_EDAC_MODULE_NAME,
.of_match_table = ppc4xx_edac_match,
},
};
/** /**
* ppc4xx_edac_init - driver/module insertion entry point * ppc4xx_edac_init - driver/module insertion entry point
* *
......
...@@ -335,6 +335,12 @@ struct sbridge_info { ...@@ -335,6 +335,12 @@ struct sbridge_info {
struct sbridge_channel { struct sbridge_channel {
u32 ranks; u32 ranks;
u32 dimms; u32 dimms;
struct dimm {
u32 rowbits;
u32 colbits;
u32 bank_xor_enable;
u32 amap_fine;
} dimm[MAX_DIMMS];
}; };
struct pci_id_descr { struct pci_id_descr {
...@@ -1603,7 +1609,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, ...@@ -1603,7 +1609,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
banks = 8; banks = 8;
for (i = 0; i < channels; i++) { for (i = 0; i < channels; i++) {
u32 mtr; u32 mtr, amap = 0;
int max_dimms_per_channel; int max_dimms_per_channel;
...@@ -1615,6 +1621,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, ...@@ -1615,6 +1621,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
max_dimms_per_channel = ARRAY_SIZE(mtr_regs); max_dimms_per_channel = ARRAY_SIZE(mtr_regs);
if (!pvt->pci_tad[i]) if (!pvt->pci_tad[i])
continue; continue;
pci_read_config_dword(pvt->pci_tad[i], 0x8c, &amap);
} }
for (j = 0; j < max_dimms_per_channel; j++) { for (j = 0; j < max_dimms_per_channel; j++) {
...@@ -1627,6 +1634,7 @@ static int __populate_dimms(struct mem_ctl_info *mci, ...@@ -1627,6 +1634,7 @@ static int __populate_dimms(struct mem_ctl_info *mci,
mtr_regs[j], &mtr); mtr_regs[j], &mtr);
} }
edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr); edac_dbg(4, "Channel #%d MTR%d = %x\n", i, j, mtr);
if (IS_DIMM_PRESENT(mtr)) { if (IS_DIMM_PRESENT(mtr)) {
if (!IS_ECC_ENABLED(pvt->info.mcmtr)) { if (!IS_ECC_ENABLED(pvt->info.mcmtr)) {
sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n", sbridge_printk(KERN_ERR, "CPU SrcID #%d, Ha #%d, Channel #%d has DIMMs, but ECC is disabled\n",
...@@ -1661,6 +1669,11 @@ static int __populate_dimms(struct mem_ctl_info *mci, ...@@ -1661,6 +1669,11 @@ static int __populate_dimms(struct mem_ctl_info *mci,
dimm->dtype = pvt->info.get_width(pvt, mtr); dimm->dtype = pvt->info.get_width(pvt, mtr);
dimm->mtype = mtype; dimm->mtype = mtype;
dimm->edac_mode = mode; dimm->edac_mode = mode;
pvt->channel[i].dimm[j].rowbits = order_base_2(rows);
pvt->channel[i].dimm[j].colbits = order_base_2(cols);
pvt->channel[i].dimm[j].bank_xor_enable =
GET_BITFIELD(pvt->info.mcmtr, 9, 9);
pvt->channel[i].dimm[j].amap_fine = GET_BITFIELD(amap, 0, 0);
snprintf(dimm->label, sizeof(dimm->label), snprintf(dimm->label, sizeof(dimm->label),
"CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u", "CPU_SrcID#%u_Ha#%u_Chan#%u_DIMM#%u",
pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j); pvt->sbridge_dev->source_id, pvt->sbridge_dev->dom, i, j);
...@@ -1922,6 +1935,99 @@ static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha) ...@@ -1922,6 +1935,99 @@ static struct mem_ctl_info *get_mci_for_node_id(u8 node_id, u8 ha)
return NULL; return NULL;
} }
static u8 sb_close_row[] = {
15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33
};
static u8 sb_close_column[] = {
3, 4, 5, 14, 19, 23, 24, 25, 26, 27
};
static u8 sb_open_row[] = {
14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33
};
static u8 sb_open_column[] = {
3, 4, 5, 6, 7, 8, 9, 10, 11, 12
};
static u8 sb_open_fine_column[] = {
3, 4, 5, 7, 8, 9, 10, 11, 12, 13
};
static int sb_bits(u64 addr, int nbits, u8 *bits)
{
int i, res = 0;
for (i = 0; i < nbits; i++)
res |= ((addr >> bits[i]) & 1) << i;
return res;
}
static int sb_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1)
{
int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1);
if (do_xor)
ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1);
return ret;
}
static bool sb_decode_ddr4(struct mem_ctl_info *mci, int ch, u8 rank,
u64 rank_addr, char *msg)
{
int dimmno = 0;
int row, col, bank_address, bank_group;
struct sbridge_pvt *pvt;
u32 bg0 = 0, rowbits = 0, colbits = 0;
u32 amap_fine = 0, bank_xor_enable = 0;
dimmno = (rank < 12) ? rank / 4 : 2;
pvt = mci->pvt_info;
amap_fine = pvt->channel[ch].dimm[dimmno].amap_fine;
bg0 = amap_fine ? 6 : 13;
rowbits = pvt->channel[ch].dimm[dimmno].rowbits;
colbits = pvt->channel[ch].dimm[dimmno].colbits;
bank_xor_enable = pvt->channel[ch].dimm[dimmno].bank_xor_enable;
if (pvt->is_lockstep) {
pr_warn_once("LockStep row/column decode is not supported yet!\n");
msg[0] = '\0';
return false;
}
if (pvt->is_close_pg) {
row = sb_bits(rank_addr, rowbits, sb_close_row);
col = sb_bits(rank_addr, colbits, sb_close_column);
col |= 0x400; /* C10 is autoprecharge, always set */
bank_address = sb_bank_bits(rank_addr, 8, 9, bank_xor_enable, 22, 28);
bank_group = sb_bank_bits(rank_addr, 6, 7, bank_xor_enable, 20, 21);
} else {
row = sb_bits(rank_addr, rowbits, sb_open_row);
if (amap_fine)
col = sb_bits(rank_addr, colbits, sb_open_fine_column);
else
col = sb_bits(rank_addr, colbits, sb_open_column);
bank_address = sb_bank_bits(rank_addr, 18, 19, bank_xor_enable, 22, 23);
bank_group = sb_bank_bits(rank_addr, bg0, 17, bank_xor_enable, 20, 21);
}
row &= (1u << rowbits) - 1;
sprintf(msg, "row:0x%x col:0x%x bank_addr:%d bank_group:%d",
row, col, bank_address, bank_group);
return true;
}
static bool sb_decode_ddr3(struct mem_ctl_info *mci, int ch, u8 rank,
u64 rank_addr, char *msg)
{
pr_warn_once("DDR3 row/column decode not support yet!\n");
msg[0] = '\0';
return false;
}
static int get_memory_error_data(struct mem_ctl_info *mci, static int get_memory_error_data(struct mem_ctl_info *mci,
u64 addr, u64 addr,
u8 *socket, u8 *ha, u8 *socket, u8 *ha,
...@@ -1937,12 +2043,13 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ...@@ -1937,12 +2043,13 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
int interleave_mode, shiftup = 0; int interleave_mode, shiftup = 0;
unsigned int sad_interleave[MAX_INTERLEAVE]; unsigned int sad_interleave[MAX_INTERLEAVE];
u32 reg, dram_rule; u32 reg, dram_rule;
u8 ch_way, sck_way, pkg, sad_ha = 0; u8 ch_way, sck_way, pkg, sad_ha = 0, rankid = 0;
u32 tad_offset; u32 tad_offset;
u32 rir_way; u32 rir_way;
u32 mb, gb; u32 mb, gb;
u64 ch_addr, offset, limit = 0, prv = 0; u64 ch_addr, offset, limit = 0, prv = 0;
u64 rank_addr;
enum mem_type mtype;
/* /*
* Step 0) Check if the address is at special memory ranges * Step 0) Check if the address is at special memory ranges
...@@ -2226,6 +2333,28 @@ static int get_memory_error_data(struct mem_ctl_info *mci, ...@@ -2226,6 +2333,28 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], &reg); pci_read_config_dword(pvt->pci_tad[base_ch], rir_offset[n_rir][idx], &reg);
*rank = RIR_RNK_TGT(pvt->info.type, reg); *rank = RIR_RNK_TGT(pvt->info.type, reg);
if (pvt->info.type == BROADWELL) {
if (pvt->is_close_pg)
shiftup = 6;
else
shiftup = 13;
rank_addr = ch_addr >> shiftup;
rank_addr /= (1 << rir_way);
rank_addr <<= shiftup;
rank_addr |= ch_addr & GENMASK_ULL(shiftup - 1, 0);
rank_addr -= RIR_OFFSET(pvt->info.type, reg);
mtype = pvt->info.get_memory_type(pvt);
rankid = *rank;
if (mtype == MEM_DDR4 || mtype == MEM_RDDR4)
sb_decode_ddr4(mci, base_ch, rankid, rank_addr, msg);
else
sb_decode_ddr3(mci, base_ch, rankid, rank_addr, msg);
} else {
msg[0] = '\0';
}
edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n", edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
n_rir, n_rir,
ch_addr, ch_addr,
...@@ -2950,7 +3079,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, ...@@ -2950,7 +3079,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
struct mem_ctl_info *new_mci; struct mem_ctl_info *new_mci;
struct sbridge_pvt *pvt = mci->pvt_info; struct sbridge_pvt *pvt = mci->pvt_info;
enum hw_event_mc_err_type tp_event; enum hw_event_mc_err_type tp_event;
char *optype, msg[256]; char *optype, msg[256], msg_full[512];
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62); bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
...@@ -3089,18 +3218,17 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, ...@@ -3089,18 +3218,17 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
*/ */
if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg) if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg)
channel = first_channel; channel = first_channel;
snprintf(msg_full, sizeof(msg_full),
snprintf(msg, sizeof(msg), "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d %s",
"%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d",
overflow ? " OVERFLOW" : "", overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "", (uncorrected_error && recoverable) ? " recoverable" : "",
area_type, area_type,
mscod, errcode, mscod, errcode,
socket, ha, socket, ha,
channel_mask, channel_mask,
rank); rank, msg);
edac_dbg(0, "%s\n", msg); edac_dbg(0, "%s\n", msg_full);
/* FIXME: need support for channel mask */ /* FIXME: need support for channel mask */
...@@ -3111,7 +3239,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, ...@@ -3111,7 +3239,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci,
edac_mc_handle_error(tp_event, mci, core_err_cnt, edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
channel, dimm, -1, channel, dimm, -1,
optype, msg); optype, msg_full);
return; return;
err_parsing: err_parsing:
edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0, edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
......
...@@ -714,8 +714,13 @@ static int __init skx_init(void) ...@@ -714,8 +714,13 @@ static int __init skx_init(void)
skx_set_decode(skx_decode, skx_show_retry_rd_err_log); skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
if (nvdimm_count && skx_adxl_get() == -ENODEV) if (nvdimm_count && skx_adxl_get() != -ENODEV) {
skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n"); skx_set_decode(NULL, skx_show_retry_rd_err_log);
} else {
if (nvdimm_count)
skx_printk(KERN_NOTICE, "Only decoding DDR4 address!\n");
skx_set_decode(skx_decode, skx_show_retry_rd_err_log);
}
/* Ensure that the OPSTATE is set correctly for POLL or NMI */ /* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init(); opstate_init();
......
...@@ -27,9 +27,11 @@ static const char * const component_names[] = { ...@@ -27,9 +27,11 @@ static const char * const component_names[] = {
[INDEX_MEMCTRL] = "MemoryControllerId", [INDEX_MEMCTRL] = "MemoryControllerId",
[INDEX_CHANNEL] = "ChannelId", [INDEX_CHANNEL] = "ChannelId",
[INDEX_DIMM] = "DimmSlotId", [INDEX_DIMM] = "DimmSlotId",
[INDEX_CS] = "ChipSelect",
[INDEX_NM_MEMCTRL] = "NmMemoryControllerId", [INDEX_NM_MEMCTRL] = "NmMemoryControllerId",
[INDEX_NM_CHANNEL] = "NmChannelId", [INDEX_NM_CHANNEL] = "NmChannelId",
[INDEX_NM_DIMM] = "NmDimmSlotId", [INDEX_NM_DIMM] = "NmDimmSlotId",
[INDEX_NM_CS] = "NmChipSelect",
}; };
static int component_indices[ARRAY_SIZE(component_names)]; static int component_indices[ARRAY_SIZE(component_names)];
...@@ -40,7 +42,7 @@ static char *adxl_msg; ...@@ -40,7 +42,7 @@ static char *adxl_msg;
static unsigned long adxl_nm_bitmap; static unsigned long adxl_nm_bitmap;
static char skx_msg[MSG_SIZE]; static char skx_msg[MSG_SIZE];
static skx_decode_f skx_decode; static skx_decode_f driver_decode;
static skx_show_retry_log_f skx_show_retry_rd_err_log; static skx_show_retry_log_f skx_show_retry_rd_err_log;
static u64 skx_tolm, skx_tohm; static u64 skx_tolm, skx_tohm;
static LIST_HEAD(dev_edac_list); static LIST_HEAD(dev_edac_list);
...@@ -139,10 +141,13 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me ...@@ -139,10 +141,13 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
(int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1; (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ? res->dimm = (adxl_nm_bitmap & BIT_NM_DIMM) ?
(int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1; (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
res->cs = (adxl_nm_bitmap & BIT_NM_CS) ?
(int)adxl_values[component_indices[INDEX_NM_CS]] : -1;
} else { } else {
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]]; res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]]; res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]]; res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
res->cs = (int)adxl_values[component_indices[INDEX_CS]];
} }
if (res->imc > NUM_IMC - 1 || res->imc < 0) { if (res->imc > NUM_IMC - 1 || res->imc < 0) {
...@@ -173,6 +178,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me ...@@ -173,6 +178,8 @@ static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_me
break; break;
} }
res->decoded_by_adxl = true;
return true; return true;
} }
...@@ -183,7 +190,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm) ...@@ -183,7 +190,7 @@ void skx_set_mem_cfg(bool mem_cfg_2lm)
void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log) void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
{ {
skx_decode = decode; driver_decode = decode;
skx_show_retry_rd_err_log = show_retry_log; skx_show_retry_rd_err_log = show_retry_log;
} }
...@@ -591,19 +598,19 @@ static void skx_mce_output_error(struct mem_ctl_info *mci, ...@@ -591,19 +598,19 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break; break;
} }
} }
if (adxl_component_count) { if (res->decoded_by_adxl) {
len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s", len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
overflow ? " OVERFLOW" : "", overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "", (uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, adxl_msg); mscod, errcode, adxl_msg);
} else { } else {
len = snprintf(skx_msg, MSG_SIZE, len = snprintf(skx_msg, MSG_SIZE,
"%s%s err_code:0x%04x:0x%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:0x%x col:0x%x", "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x",
overflow ? " OVERFLOW" : "", overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "", (uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, mscod, errcode,
res->socket, res->imc, res->rank, res->socket, res->imc, res->rank,
res->bank_group, res->bank_address, res->row, res->column); res->row, res->column, res->bank_address, res->bank_group);
} }
if (skx_show_retry_rd_err_log) if (skx_show_retry_rd_err_log)
...@@ -649,13 +656,14 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, ...@@ -649,13 +656,14 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
return NOTIFY_DONE; return NOTIFY_DONE;
memset(&res, 0, sizeof(res)); memset(&res, 0, sizeof(res));
res.mce = mce;
res.addr = mce->addr; res.addr = mce->addr;
if (adxl_component_count) { /* Try driver decoder first */
if (!skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))) if (!(driver_decode && driver_decode(&res))) {
/* Then try firmware decoder (ACPI DSM methods) */
if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
return NOTIFY_DONE; return NOTIFY_DONE;
} else if (!skx_decode || !skx_decode(&res)) {
return NOTIFY_DONE;
} }
mci = res.dev->imc[res.imc].mci; mci = res.dev->imc[res.imc].mci;
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#define _SKX_COMM_EDAC_H #define _SKX_COMM_EDAC_H
#include <linux/bits.h> #include <linux/bits.h>
#include <asm/mce.h>
#define MSG_SIZE 1024 #define MSG_SIZE 1024
...@@ -52,6 +53,9 @@ ...@@ -52,6 +53,9 @@
#define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15)
#define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i)
#define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15)
#define MCI_MISC_ECC_DDRT 8 /* read from DDRT */
/* /*
* Each cpu socket contains some pci devices that provide global * Each cpu socket contains some pci devices that provide global
* information, and also some that are local to each of the two * information, and also some that are local to each of the two
...@@ -82,6 +86,7 @@ struct skx_dev { ...@@ -82,6 +86,7 @@ struct skx_dev {
struct pci_dev *edev; struct pci_dev *edev;
u32 retry_rd_err_log_s; u32 retry_rd_err_log_s;
u32 retry_rd_err_log_d; u32 retry_rd_err_log_d;
u32 retry_rd_err_log_d2;
struct skx_dimm { struct skx_dimm {
u8 close_pg; u8 close_pg;
u8 bank_xor_enable; u8 bank_xor_enable;
...@@ -108,18 +113,22 @@ enum { ...@@ -108,18 +113,22 @@ enum {
INDEX_MEMCTRL, INDEX_MEMCTRL,
INDEX_CHANNEL, INDEX_CHANNEL,
INDEX_DIMM, INDEX_DIMM,
INDEX_CS,
INDEX_NM_FIRST, INDEX_NM_FIRST,
INDEX_NM_MEMCTRL = INDEX_NM_FIRST, INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
INDEX_NM_CHANNEL, INDEX_NM_CHANNEL,
INDEX_NM_DIMM, INDEX_NM_DIMM,
INDEX_NM_CS,
INDEX_MAX INDEX_MAX
}; };
#define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL)
#define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL)
#define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM)
#define BIT_NM_CS BIT_ULL(INDEX_NM_CS)
struct decoded_addr { struct decoded_addr {
struct mce *mce;
struct skx_dev *dev; struct skx_dev *dev;
u64 addr; u64 addr;
int socket; int socket;
...@@ -129,6 +138,7 @@ struct decoded_addr { ...@@ -129,6 +138,7 @@ struct decoded_addr {
int sktways; int sktways;
int chanways; int chanways;
int dimm; int dimm;
int cs;
int rank; int rank;
int channel_rank; int channel_rank;
u64 rank_address; u64 rank_address;
...@@ -136,6 +146,7 @@ struct decoded_addr { ...@@ -136,6 +146,7 @@ struct decoded_addr {
int column; int column;
int bank_address; int bank_address;
int bank_group; int bank_group;
bool decoded_by_adxl;
}; };
struct res_config { struct res_config {
...@@ -154,7 +165,12 @@ struct res_config { ...@@ -154,7 +165,12 @@ struct res_config {
int sad_all_offset; int sad_all_offset;
/* Offsets of retry_rd_err_log registers */ /* Offsets of retry_rd_err_log registers */
u32 *offsets_scrub; u32 *offsets_scrub;
u32 *offsets_scrub_hbm0;
u32 *offsets_scrub_hbm1;
u32 *offsets_demand; u32 *offsets_demand;
u32 *offsets_demand2;
u32 *offsets_demand_hbm0;
u32 *offsets_demand_hbm1;
}; };
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
......
...@@ -37,7 +37,6 @@ int edac_workqueue_setup(void) ...@@ -37,7 +37,6 @@ int edac_workqueue_setup(void)
void edac_workqueue_teardown(void) void edac_workqueue_teardown(void)
{ {
flush_workqueue(wq);
destroy_workqueue(wq); destroy_workqueue(wq);
wq = NULL; wq = NULL;
} }
...@@ -231,21 +231,21 @@ enum mem_type { ...@@ -231,21 +231,21 @@ enum mem_type {
#define MEM_FLAG_DDR BIT(MEM_DDR) #define MEM_FLAG_DDR BIT(MEM_DDR)
#define MEM_FLAG_RDDR BIT(MEM_RDDR) #define MEM_FLAG_RDDR BIT(MEM_RDDR)
#define MEM_FLAG_RMBS BIT(MEM_RMBS) #define MEM_FLAG_RMBS BIT(MEM_RMBS)
#define MEM_FLAG_DDR2 BIT(MEM_DDR2) #define MEM_FLAG_DDR2 BIT(MEM_DDR2)
#define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2) #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
#define MEM_FLAG_RDDR2 BIT(MEM_RDDR2) #define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
#define MEM_FLAG_XDR BIT(MEM_XDR) #define MEM_FLAG_XDR BIT(MEM_XDR)
#define MEM_FLAG_DDR3 BIT(MEM_DDR3) #define MEM_FLAG_DDR3 BIT(MEM_DDR3)
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) #define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3) #define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3)
#define MEM_FLAG_DDR4 BIT(MEM_DDR4) #define MEM_FLAG_DDR4 BIT(MEM_DDR4)
#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4)
#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) #define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4)
#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4) #define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4)
#define MEM_FLAG_DDR5 BIT(MEM_DDR5) #define MEM_FLAG_DDR5 BIT(MEM_DDR5)
#define MEM_FLAG_RDDR5 BIT(MEM_RDDR5) #define MEM_FLAG_RDDR5 BIT(MEM_RDDR5)
#define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5) #define MEM_FLAG_LRDDR5 BIT(MEM_LRDDR5)
#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM) #define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
#define MEM_FLAG_WIO2 BIT(MEM_WIO2) #define MEM_FLAG_WIO2 BIT(MEM_WIO2)
#define MEM_FLAG_HBM2 BIT(MEM_HBM2) #define MEM_FLAG_HBM2 BIT(MEM_HBM2)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment