powerpc/powernv: Improve kexec reliability

We add a machine_shutdown hook that frees the OPAL interrupts
(so they get masked at the source and don't fire while kexec'ing)
and which triggers an IODA reset on all the PCIe host bridges
which will have the effect of blocking all DMAs and subsequent
PCIs interrupts.
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 1de1455f
......@@ -563,6 +563,8 @@ extern void opal_nvram_init(void);
extern int opal_machine_check(struct pt_regs *regs);
extern void opal_shutdown(void);
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_H */
......@@ -15,6 +15,7 @@
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
#include <asm/opal.h>
#include <asm/firmware.h>
......@@ -28,6 +29,8 @@ struct opal {
static struct device_node *opal_node;
static DEFINE_SPINLOCK(opal_write_lock);
extern u64 opal_mc_secondary_handler[];
static unsigned int *opal_irqs;
static unsigned int opal_irq_count;
int __init early_init_dt_scan_opal(unsigned long node,
const char *uname, int depth, void *data)
......@@ -323,6 +326,8 @@ static int __init opal_init(void)
irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
pr_debug("opal: Found %d interrupts reserved for OPAL\n",
irqs ? (irqlen / 4) : 0);
opal_irq_count = irqlen / 4;
opal_irqs = kzalloc(opal_irq_count * sizeof(unsigned int), GFP_KERNEL);
for (i = 0; irqs && i < (irqlen / 4); i++, irqs++) {
unsigned int hwirq = be32_to_cpup(irqs);
unsigned int irq = irq_create_mapping(NULL, hwirq);
......@@ -334,7 +339,19 @@ static int __init opal_init(void)
if (rc)
pr_warning("opal: Error %d requesting irq %d"
" (0x%x)\n", rc, irq, hwirq);
opal_irqs[i] = irq;
}
return 0;
}
subsys_initcall(opal_init);
void opal_shutdown(void)
{
unsigned int i;
for (i = 0; i < opal_irq_count; i++) {
if (opal_irqs[i])
free_irq(opal_irqs[i], 0);
opal_irqs[i] = 0;
}
}
......@@ -1048,6 +1048,12 @@ static u32 pnv_ioda_bdfn_to_pe(struct pnv_phb *phb, struct pci_bus *bus,
return phb->ioda.pe_rmap[(bus->number << 8) | devfn];
}
static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
{
opal_pci_reset(phb->opal_id, OPAL_PCI_IODA_TABLE_RESET,
OPAL_ASSERT_RESET);
}
void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
{
struct pci_controller *hose;
......@@ -1178,6 +1184,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
/* Setup TCEs */
phb->dma_dev_setup = pnv_pci_ioda_dma_dev_setup;
/* Setup shutdown function for kexec */
phb->shutdown = pnv_pci_ioda_shutdown;
/* Setup MSI support */
pnv_pci_init_ioda_msis(phb);
......
......@@ -450,6 +450,18 @@ static void pnv_pci_dma_dev_setup(struct pci_dev *pdev)
pnv_pci_dma_fallback_setup(hose, pdev);
}
void pnv_pci_shutdown(void)
{
struct pci_controller *hose;
list_for_each_entry(hose, &hose_list, list_node) {
struct pnv_phb *phb = hose->private_data;
if (phb && phb->shutdown)
phb->shutdown(phb);
}
}
/* Fixup wrong class code in p7ioc and p8 root complex */
static void pnv_p7ioc_rc_quirk(struct pci_dev *dev)
{
......
......@@ -86,6 +86,7 @@ struct pnv_phb {
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
void (*fixup_phb)(struct pci_controller *hose);
u32 (*bdfn_to_pe)(struct pnv_phb *phb, struct pci_bus *bus, u32 devfn);
void (*shutdown)(struct pnv_phb *phb);
union {
struct {
......@@ -158,4 +159,5 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
u64 *startp, u64 *endp);
#endif /* __POWERNV_PCI_H */
......@@ -9,8 +9,10 @@ static inline void pnv_smp_init(void) { }
#ifdef CONFIG_PCI
extern void pnv_pci_init(void);
extern void pnv_pci_shutdown(void);
#else
static inline void pnv_pci_init(void) { }
static inline void pnv_pci_shutdown(void) { }
#endif
#endif /* _POWERNV_H */
......@@ -126,6 +126,17 @@ static void pnv_progress(char *s, unsigned short hex)
{
}
static void pnv_shutdown(void)
{
/* Let the PCI code clear up IODA tables */
pnv_pci_shutdown();
/* And unregister all OPAL interrupts so they don't fire
* up while we kexec
*/
opal_shutdown();
}
#ifdef CONFIG_KEXEC
static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
{
......@@ -187,6 +198,7 @@ define_machine(powernv) {
.init_IRQ = pnv_init_IRQ,
.show_cpuinfo = pnv_show_cpuinfo,
.progress = pnv_progress,
.machine_shutdown = pnv_shutdown,
.power_save = power7_idle,
.calibrate_decr = generic_calibrate_decr,
#ifdef CONFIG_KEXEC
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment