Commit ef708b53 authored by Mauro Carvalho Chehab's avatar Mauro Carvalho Chehab

i7core_edac: Add additional tests for error detection

Properly check the number of channels and improve probing error detection
Signed-off-by: default avatarMauro Carvalho Chehab <mchehab@redhat.com>
parent 442305b1
...@@ -189,6 +189,7 @@ struct i7core_pvt { ...@@ -189,6 +189,7 @@ struct i7core_pvt {
struct i7core_info info; struct i7core_info info;
struct i7core_inject inject; struct i7core_inject inject;
struct i7core_channel channel[NUM_CHANS]; struct i7core_channel channel[NUM_CHANS];
int channels; /* Number of active channels */
int ce_count_available; int ce_count_available;
unsigned long ce_count[MAX_DIMMS]; /* ECC corrected errors counts per dimm */ unsigned long ce_count[MAX_DIMMS]; /* ECC corrected errors counts per dimm */
...@@ -259,12 +260,12 @@ static struct edac_pci_ctl_info *i7core_pci; ...@@ -259,12 +260,12 @@ static struct edac_pci_ctl_info *i7core_pci;
****************************************************************************/ ****************************************************************************/
/* MC_CONTROL bits */ /* MC_CONTROL bits */
#define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & 1 << (8 + ch)) #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
#define ECCx8(pvt) ((pvt)->info.mc_control & 1 << 1) #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
/* MC_STATUS bits */ /* MC_STATUS bits */
#define ECC_ENABLED(pvt) ((pvt)->info.mc_status & 1 << 3) #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 3))
#define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & 1 << ch) #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
/* MC_MAX_DOD read functions */ /* MC_MAX_DOD read functions */
static inline int maxnumdimms(struct i7core_pvt *pvt) static inline int maxnumdimms(struct i7core_pvt *pvt)
...@@ -308,6 +309,48 @@ static inline int maxnumcol(struct i7core_pvt *pvt) ...@@ -308,6 +309,48 @@ static inline int maxnumcol(struct i7core_pvt *pvt)
/**************************************************************************** /****************************************************************************
Memory check routines Memory check routines
****************************************************************************/ ****************************************************************************/
static int i7core_get_active_channels(int *channels)
{
struct pci_dev *pdev = NULL;
int i;
u32 status, control;
*channels = 0;
for (i = 0; i < N_DEVS; i++) {
if (!pci_devs[i].pdev)
continue;
if (PCI_SLOT(pci_devs[i].pdev->devfn) == 3 &&
PCI_FUNC(pci_devs[i].pdev->devfn) == 0) {
pdev = pci_devs[i].pdev;
break;
}
}
if (!pdev)
return -ENODEV;
/* Device 3 function 0 reads */
pci_read_config_dword(pdev, MC_STATUS, &status);
pci_read_config_dword(pdev, MC_CONTROL, &control);
for (i = 0; i < NUM_CHANS; i++) {
/* Check if the channel is active */
if (!(control & (1 << (8 + i))))
continue;
/* Check if the channel is disabled */
if (status & (1 << i)) {
continue;
}
(*channels)++;
}
return 0;
}
static int get_dimm_config(struct mem_ctl_info *mci) static int get_dimm_config(struct mem_ctl_info *mci)
{ {
struct i7core_pvt *pvt = mci->pvt_info; struct i7core_pvt *pvt = mci->pvt_info;
...@@ -852,69 +895,103 @@ static void i7core_put_devices(void) ...@@ -852,69 +895,103 @@ static void i7core_put_devices(void)
* *
* Need to 'get' device 16 func 1 and func 2 * Need to 'get' device 16 func 1 and func 2
*/ */
static int i7core_get_devices(struct mem_ctl_info *mci, struct pci_dev *mcidev) static int i7core_get_devices(void)
{ {
struct i7core_pvt *pvt = mci->pvt_info; int rc, i;
int rc, i,func;
struct pci_dev *pdev = NULL; struct pci_dev *pdev = NULL;
pvt = mci->pvt_info;
memset(pvt, 0, sizeof(*pvt));
for (i = 0; i < N_DEVS; i++) { for (i = 0; i < N_DEVS; i++) {
pdev = pci_get_device(PCI_VENDOR_ID_INTEL, pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
pci_devs[i].dev_id, NULL); pci_devs[i].dev_id, NULL);
if (!pdev) { if (likely(pdev))
/* End of list, leave */ pci_devs[i].pdev = pdev;
else {
i7core_printk(KERN_ERR, i7core_printk(KERN_ERR,
"Device not found: PCI ID %04x:%04x " "Device not found: PCI ID %04x:%04x "
"(dev %d, func %d)\n", "(dev %d, func %d)\n",
PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id, PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id,
pci_devs[i].dev,pci_devs[i].func); pci_devs[i].dev,pci_devs[i].func);
/* Dev 3 function 2 only exists on chips with RDIMMs */
if ((pci_devs[i].dev == 3) && (pci_devs[i].func == 2)) if ((pci_devs[i].dev == 3) && (pci_devs[i].func == 2))
continue; /* Only on chips with RDIMMs */ continue;
else
i7core_put_devices(); /* End of list, leave */
rc = -ENODEV;
goto error;
} }
pci_devs[i].pdev = pdev;
rc = pci_enable_device(pdev); /* Sanity check */
if (rc < 0) { if (unlikely(PCI_SLOT(pdev->devfn) != pci_devs[i].dev ||
PCI_FUNC(pdev->devfn) != pci_devs[i].func)) {
i7core_printk(KERN_ERR, i7core_printk(KERN_ERR,
"Couldn't enable PCI ID %04x:%04x " "Device PCI ID %04x:%04x "
"(dev %d, func %d)\n", "has fn %d.%d instead of fn %d.%d\n",
PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id, PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id,
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
pci_devs[i].dev, pci_devs[i].func); pci_devs[i].dev, pci_devs[i].func);
i7core_put_devices(); rc = -EINVAL;
return rc; goto error;
} }
/* Sanity check */
if (PCI_FUNC(pdev->devfn) != pci_devs[i].func) { /* Be sure that the device is enabled */
rc = pci_enable_device(pdev);
if (unlikely(rc < 0)) {
i7core_printk(KERN_ERR, i7core_printk(KERN_ERR,
"Device PCI ID %04x:%04x " "Couldn't enable PCI ID %04x:%04x "
"has function %d instead of %d\n", "fn %d.%d\n",
PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id, PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id,
PCI_FUNC(pdev->devfn), pci_devs[i].func); PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
i7core_put_devices(); goto error;
return -EINVAL;
} }
i7core_printk(KERN_INFO, i7core_printk(KERN_INFO,
"Registered device %0x:%0x fn=%0x %0x\n", "Registered device %0x:%0x fn %d.%d\n",
PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id, PCI_VENDOR_ID_INTEL, pci_devs[i].dev_id,
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
}
return 0;
error:
i7core_put_devices();
return -EINVAL;
}
static int mci_bind_devs(struct mem_ctl_info *mci)
{
struct i7core_pvt *pvt = mci->pvt_info;
struct pci_dev *pdev;
int i, func, slot;
for (i = 0; i < N_DEVS; i++) {
pdev = pci_devs[i].pdev;
if (!pdev)
continue;
func = PCI_FUNC(pdev->devfn); func = PCI_FUNC(pdev->devfn);
if (pci_devs[i].dev < 4) { slot = PCI_SLOT(pdev->devfn);
if (slot == 3) {
if (unlikely(func > MAX_MCR_FUNC))
goto error;
pvt->pci_mcr[func] = pdev; pvt->pci_mcr[func] = pdev;
} else { } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
pvt->pci_ch[pci_devs[i].dev - 4][func] = pdev; if (unlikely(func > MAX_CHAN_FUNC))
} goto error;
pvt->pci_ch[slot - 4][func] = pdev;
} else
goto error;
debugf0("Associated fn %d.%d, dev = %p\n",
PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), pdev);
} }
i7core_printk(KERN_INFO, "Driver loaded.\n");
return 0; return 0;
error:
i7core_printk(KERN_ERR, "Device %d, function %d "
"is out of the expected range\n",
slot, func);
return -EINVAL;
} }
/**************************************************************************** /****************************************************************************
...@@ -1001,41 +1078,38 @@ static int __devinit i7core_probe(struct pci_dev *pdev, ...@@ -1001,41 +1078,38 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
{ {
struct mem_ctl_info *mci; struct mem_ctl_info *mci;
struct i7core_pvt *pvt; struct i7core_pvt *pvt;
int num_channels; int num_channels = 0;
int num_csrows; int num_csrows;
int num_dimms_per_channel;
int dev_idx = id->driver_data; int dev_idx = id->driver_data;
if (dev_idx >= ARRAY_SIZE(i7core_devs)) if (unlikely(dev_idx >= ARRAY_SIZE(i7core_devs)))
return -EINVAL; return -EINVAL;
num_channels = NUM_CHANS; /* get the pci devices we want to reserve for our use */
if (unlikely(i7core_get_devices() < 0))
return -ENODEV;
/* Check the number of active and not disabled channels */
if (unlikely (i7core_get_active_channels(&num_channels)) < 0)
goto fail0;
/* FIXME: FAKE data, since we currently don't now how to get this */ /* FIXME: we currently don't know the number of csrows */
num_dimms_per_channel = 4; num_csrows = num_channels;
num_csrows = num_dimms_per_channel;
/* allocate a new MC control structure */ /* allocate a new MC control structure */
mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0); mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
if (mci == NULL) if (unlikely (!mci))
return -ENOMEM; return -ENOMEM;
debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci); debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
/* 'get' the pci devices we want to reserve for our use */
if (i7core_get_devices(mci, pdev))
goto fail0;
mci->dev = &pdev->dev; /* record ptr to the generic device */ mci->dev = &pdev->dev; /* record ptr to the generic device */
pvt = mci->pvt_info; pvt = mci->pvt_info;
memset(pvt, 0, sizeof(*pvt));
// pvt->system_address = pdev; /* Record this device in our private */
// pvt->maxch = num_channels;
// pvt->maxdimmperch = num_dimms_per_channel;
mci->mc_idx = 0; mci->mc_idx = 0;
mci->mtype_cap = MEM_FLAG_FB_DDR2; /* FIXME: it uses DDR3 */ mci->mtype_cap = MEM_FLAG_DDR3; /* FIXME: how to handle RDDR3? */
mci->edac_ctl_cap = EDAC_FLAG_NONE; mci->edac_ctl_cap = EDAC_FLAG_NONE;
mci->edac_cap = EDAC_FLAG_NONE; mci->edac_cap = EDAC_FLAG_NONE;
mci->mod_name = "i7core_edac.c"; mci->mod_name = "i7core_edac.c";
...@@ -1044,12 +1118,18 @@ static int __devinit i7core_probe(struct pci_dev *pdev, ...@@ -1044,12 +1118,18 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
mci->dev_name = pci_name(pdev); mci->dev_name = pci_name(pdev);
mci->ctl_page_to_phys = NULL; mci->ctl_page_to_phys = NULL;
mci->mc_driver_sysfs_attributes = i7core_inj_attrs; mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
/* Set the function pointer to an actual operation function */ /* Set the function pointer to an actual operation function */
mci->edac_check = i7core_check_error; mci->edac_check = i7core_check_error;
/* Store pci devices at mci for faster access */
if (unlikely (mci_bind_devs(mci)) < 0)
goto fail1;
/* Get dimm basic config */
get_dimm_config(mci);
/* add this new MC control structure to EDAC's list of MCs */ /* add this new MC control structure to EDAC's list of MCs */
if (edac_mc_add_mc(mci)) { if (unlikely(edac_mc_add_mc(mci)) < 0) {
debugf0("MC: " __FILE__ debugf0("MC: " __FILE__
": %s(): failed edac_mc_add_mc()\n", __func__); ": %s(): failed edac_mc_add_mc()\n", __func__);
/* FIXME: perhaps some code should go here that disables error /* FIXME: perhaps some code should go here that disables error
...@@ -1060,7 +1140,7 @@ static int __devinit i7core_probe(struct pci_dev *pdev, ...@@ -1060,7 +1140,7 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
/* allocating generic PCI control info */ /* allocating generic PCI control info */
i7core_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR); i7core_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
if (!i7core_pci) { if (unlikely (!i7core_pci)) {
printk(KERN_WARNING printk(KERN_WARNING
"%s(): Unable to create PCI control\n", "%s(): Unable to create PCI control\n",
__func__); __func__);
...@@ -1070,15 +1150,14 @@ static int __devinit i7core_probe(struct pci_dev *pdev, ...@@ -1070,15 +1150,14 @@ static int __devinit i7core_probe(struct pci_dev *pdev,
} }
/* Default error mask is any memory */ /* Default error mask is any memory */
pvt->inject.channel = -1; pvt->inject.channel = 0;
pvt->inject.dimm = -1; pvt->inject.dimm = -1;
pvt->inject.rank = -1; pvt->inject.rank = -1;
pvt->inject.bank = -1; pvt->inject.bank = -1;
pvt->inject.page = -1; pvt->inject.page = -1;
pvt->inject.col = -1; pvt->inject.col = -1;
/* Get dimm basic config */ i7core_printk(KERN_INFO, "Driver loaded.\n");
get_dimm_config(mci);
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment