Commit d2b0f6f7 authored by Gavin Shan's avatar Gavin Shan Committed by Benjamin Herrenschmidt

powerpc/eeh: No hotplug on permanently removed dev

The issue was detected in a bit complicated test case where
we have multiple hierarchical PEs shown as following figure:

                +-----------------+
                | PE#3     p2p#0  |
                |          p2p#1  |
                +-----------------+
                        |
                +-----------------+
                | PE#4     pdev#0 |
                |          pdev#1 |
                +-----------------+

PE#4 (have 2 PCI devices) is the child of PE#3, which has 2 p2p
bridges. We accidentally had less-known scenario: PE#4 was removed
permanently from the system because of permanent failure (e.g.
exceeding the max allowd failure times in last hour), then we detects
EEH errors on PE#3 and tried to recover it. However, eeh_dev instances
for pdev#0/1 were not detached from PE#4, which was still connected to
PE#3. All of that was because of the fact that we rely on count-based
pcibios_release_device(), which isn't reliable enough. When doing
recovery for PE#3, we still apply hotplug on PE#4 and pdev#0/1, which
are not valid any more. Eventually, we run into kernel crash.

The patch fixes above issue from two aspects. For unplug, we simply
skip those permanently removed PE, whose state is (EEH_PE_STATE_ISOLATED
&& !EEH_PE_STATE_RECOVERING) and its frozen count should be greater
than EEH_MAX_ALLOWED_FREEZES. For plug, we marked all permanently
removed EEH devices with EEH_DEV_REMOVED and return 0xFF's on read
its PCI config so that PCI core will omit them.
Signed-off-by: default avatarGavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 7f52a526
...@@ -98,6 +98,7 @@ struct eeh_pe { ...@@ -98,6 +98,7 @@ struct eeh_pe {
#define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ #define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */
#define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ #define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */
#define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */
struct eeh_dev { struct eeh_dev {
int mode; /* EEH mode */ int mode; /* EEH mode */
......
...@@ -58,6 +58,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val); ...@@ -58,6 +58,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_pe_state_mark(struct eeh_pe *pe, int state); void eeh_pe_state_mark(struct eeh_pe *pe, int state);
void eeh_pe_state_clear(struct eeh_pe *pe, int state); void eeh_pe_state_clear(struct eeh_pe *pe, int state);
void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
void eeh_sysfs_add_device(struct pci_dev *pdev); void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev); void eeh_sysfs_remove_device(struct pci_dev *pdev);
......
...@@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev) ...@@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev)
} }
} }
static bool eeh_dev_removed(struct eeh_dev *edev)
{
/* EEH device removed ? */
if (!edev || (edev->mode & EEH_DEV_REMOVED))
return true;
return false;
}
/** /**
* eeh_report_error - Report pci error to each device driver * eeh_report_error - Report pci error to each device driver
* @data: eeh device * @data: eeh device
...@@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata) ...@@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata; enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver; struct pci_driver *driver;
/* We might not have the associated PCI device, if (!dev || eeh_dev_removed(edev))
* then we should continue for next one. return NULL;
*/
if (!dev) return NULL;
dev->error_state = pci_channel_io_frozen; dev->error_state = pci_channel_io_frozen;
driver = eeh_pcid_get(dev); driver = eeh_pcid_get(dev);
...@@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata) ...@@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata; enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver; struct pci_driver *driver;
if (!dev || eeh_dev_removed(edev))
return NULL;
driver = eeh_pcid_get(dev); driver = eeh_pcid_get(dev);
if (!driver) return NULL; if (!driver) return NULL;
...@@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata) ...@@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata)
enum pci_ers_result rc, *res = userdata; enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver; struct pci_driver *driver;
if (!dev) return NULL; if (!dev || eeh_dev_removed(edev))
return NULL;
dev->error_state = pci_channel_io_normal; dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev); driver = eeh_pcid_get(dev);
...@@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata) ...@@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver; struct pci_driver *driver;
if (!dev) return NULL; if (!dev || eeh_dev_removed(edev))
return NULL;
dev->error_state = pci_channel_io_normal; dev->error_state = pci_channel_io_normal;
driver = eeh_pcid_get(dev); driver = eeh_pcid_get(dev);
...@@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata) ...@@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata)
struct pci_dev *dev = eeh_dev_to_pci_dev(edev); struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_driver *driver; struct pci_driver *driver;
if (!dev) return NULL; if (!dev || eeh_dev_removed(edev))
return NULL;
dev->error_state = pci_channel_io_perm_failure; dev->error_state = pci_channel_io_perm_failure;
driver = eeh_pcid_get(dev); driver = eeh_pcid_get(dev);
...@@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata) ...@@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata)
if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
return NULL; return NULL;
/*
* We rely on count-based pcibios_release_device() to
* detach permanently offlined PEs. Unfortunately, that's
* not reliable enough. We might have the permanently
* offlined PEs attached, but we needn't take care of
* them and their child devices.
*/
if (eeh_dev_removed(edev))
return NULL;
driver = eeh_pcid_get(dev); driver = eeh_pcid_get(dev);
if (driver) { if (driver) {
eeh_pcid_put(dev); eeh_pcid_put(dev);
...@@ -694,8 +717,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) ...@@ -694,8 +717,17 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
/* Notify all devices that they're about to go down. */ /* Notify all devices that they're about to go down. */
eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
/* Shut down the device drivers for good. */ /* Mark the PE to be removed permanently */
pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
/*
* Shut down the device drivers for good. We mark
* all removed devices correctly to avoid access
* the their PCI config any more.
*/
if (frozen_bus) { if (frozen_bus) {
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
pci_lock_rescan_remove(); pci_lock_rescan_remove();
pcibios_remove_pci_devices(frozen_bus); pcibios_remove_pci_devices(frozen_bus);
pci_unlock_rescan_remove(); pci_unlock_rescan_remove();
......
...@@ -503,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag) ...@@ -503,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
struct eeh_dev *edev, *tmp; struct eeh_dev *edev, *tmp;
struct pci_dev *pdev; struct pci_dev *pdev;
/* /* Keep the state of permanently removed PE intact */
* Mark the PE with the indicated state. Also, if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
* the associated PCI device will be put into (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
* I/O frozen state to avoid I/O accesses from return NULL;
* the PCI device driver.
*/
pe->state |= state; pe->state |= state;
/* Offline PCI devices if applicable */
if (state != EEH_PE_ISOLATED)
return NULL;
eeh_pe_for_each_dev(pe, edev, tmp) { eeh_pe_for_each_dev(pe, edev, tmp) {
pdev = eeh_dev_to_pci_dev(edev); pdev = eeh_dev_to_pci_dev(edev);
if (pdev) if (pdev)
...@@ -532,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state) ...@@ -532,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state)
eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
} }
static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
{
struct eeh_dev *edev = data;
int mode = *((int *)flag);
edev->mode |= mode;
return NULL;
}
/**
* eeh_pe_dev_state_mark - Mark state for all device under the PE
* @pe: EEH PE
*
* Mark specific state for all child devices of the PE.
*/
void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
{
eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
}
/** /**
* __eeh_pe_state_clear - Clear state for the PE * __eeh_pe_state_clear - Clear state for the PE
* @data: EEH PE * @data: EEH PE
...@@ -546,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag) ...@@ -546,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
struct eeh_pe *pe = (struct eeh_pe *)data; struct eeh_pe *pe = (struct eeh_pe *)data;
int state = *((int *)flag); int state = *((int *)flag);
/* Keep the state of permanently removed PE intact */
if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
(state & EEH_PE_ISOLATED))
return NULL;
pe->state &= ~state; pe->state &= ~state;
pe->check_count = 0;
/* Clear check count since last isolation */
if (state & EEH_PE_ISOLATED)
pe->check_count = 0;
return NULL; return NULL;
} }
......
...@@ -304,6 +304,9 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, ...@@ -304,6 +304,9 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
struct pci_dev *dev = NULL; struct pci_dev *dev = NULL;
const __be32 *reg; const __be32 *reg;
int reglen, devfn; int reglen, devfn;
#ifdef CONFIG_EEH
struct eeh_dev *edev = of_node_to_eeh_dev(dn);
#endif
pr_debug(" * %s\n", dn->full_name); pr_debug(" * %s\n", dn->full_name);
if (!of_device_is_available(dn)) if (!of_device_is_available(dn))
...@@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus, ...@@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
return dev; return dev;
} }
/* Device removed permanently ? */
#ifdef CONFIG_EEH
if (edev && (edev->mode & EEH_DEV_REMOVED))
return NULL;
#endif
/* create a new pci_dev for this device */ /* create a new pci_dev for this device */
dev = of_create_pci_dev(dn, bus, devfn); dev = of_create_pci_dev(dn, bus, devfn);
if (!dev) if (!dev)
......
...@@ -441,11 +441,16 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose, ...@@ -441,11 +441,16 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose,
if (!(phb->flags & PNV_PHB_FLAG_EEH)) if (!(phb->flags & PNV_PHB_FLAG_EEH))
return true; return true;
/* PE reset ? */ /* PE reset or device removed ? */
edev = of_node_to_eeh_dev(dn); edev = of_node_to_eeh_dev(dn);
if (edev && edev->pe && if (edev) {
(edev->pe->state & EEH_PE_RESET)) if (edev->pe &&
return false; (edev->pe->state & EEH_PE_RESET))
return false;
if (edev->mode & EEH_DEV_REMOVED)
return false;
}
return true; return true;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment