Commit 15cddddb authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ppc64: Add support for hotplug cpus

From: Joel Schopp <jschopp@austin.ibm.com>

Add support for hotplug cpus
parent eabf4910
...@@ -248,6 +248,14 @@ source "fs/Kconfig.binfmt" ...@@ -248,6 +248,14 @@ source "fs/Kconfig.binfmt"
source "drivers/pci/Kconfig" source "drivers/pci/Kconfig"
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP && HOTPLUG && EXPERIMENTAL
---help---
Say Y here to be able to turn CPUs off and on.
Say N if you are unsure.
source "drivers/pcmcia/Kconfig" source "drivers/pcmcia/Kconfig"
source "drivers/pci/hotplug/Kconfig" source "drivers/pci/hotplug/Kconfig"
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/unistd.h> #include <linux/unistd.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/cpu.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
...@@ -150,12 +151,18 @@ int default_idle(void) ...@@ -150,12 +151,18 @@ int default_idle(void)
} }
schedule(); schedule();
if (cpu_is_offline(smp_processor_id()) &&
system_state == SYSTEM_RUNNING)
cpu_die();
} }
return 0; return 0;
} }
#ifdef CONFIG_PPC_PSERIES #ifdef CONFIG_PPC_PSERIES
DECLARE_PER_CPU(smt_snooze_delay);
int dedicated_idle(void) int dedicated_idle(void)
{ {
long oldval; long oldval;
...@@ -236,6 +243,9 @@ int dedicated_idle(void) ...@@ -236,6 +243,9 @@ int dedicated_idle(void)
HMT_medium(); HMT_medium();
lpaca->xLpPaca.xIdle = 0; lpaca->xLpPaca.xIdle = 0;
schedule(); schedule();
if (cpu_is_offline(smp_processor_id()) &&
system_state == SYSTEM_RUNNING)
cpu_die();
} }
return 0; return 0;
} }
...@@ -245,6 +255,10 @@ int shared_idle(void) ...@@ -245,6 +255,10 @@ int shared_idle(void)
struct paca_struct *lpaca = get_paca(); struct paca_struct *lpaca = get_paca();
while (1) { while (1) {
if (cpu_is_offline(smp_processor_id()) &&
system_state == SYSTEM_RUNNING)
cpu_die();
/* Indicate to the HV that we are idle. Now would be /* Indicate to the HV that we are idle. Now would be
* a good time to find other work to dispatch. */ * a good time to find other work to dispatch. */
lpaca->xLpPaca.xIdle = 1; lpaca->xLpPaca.xIdle = 1;
......
...@@ -683,6 +683,7 @@ static struct proc_dir_entry * root_irq_dir; ...@@ -683,6 +683,7 @@ static struct proc_dir_entry * root_irq_dir;
static struct proc_dir_entry * irq_dir [NR_IRQS]; static struct proc_dir_entry * irq_dir [NR_IRQS];
static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; static struct proc_dir_entry * smp_affinity_entry [NR_IRQS];
/* Protected by irq descriptor spinlock */
#ifdef CONFIG_IRQ_ALL_CPUS #ifdef CONFIG_IRQ_ALL_CPUS
cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; cpumask_t irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
#else /* CONFIG_IRQ_ALL_CPUS */ #else /* CONFIG_IRQ_ALL_CPUS */
...@@ -702,16 +703,17 @@ static int irq_affinity_read_proc (char *page, char **start, off_t off, ...@@ -702,16 +703,17 @@ static int irq_affinity_read_proc (char *page, char **start, off_t off,
static int irq_affinity_write_proc (struct file *file, const char *buffer, static int irq_affinity_write_proc (struct file *file, const char *buffer,
unsigned long count, void *data) unsigned long count, void *data)
{ {
int irq = (long)data, full_count = count, err; int irq = (long)data;
int ret;
cpumask_t new_value, tmp; cpumask_t new_value, tmp;
cpumask_t allcpus = CPU_MASK_ALL; cpumask_t allcpus = CPU_MASK_ALL;
if (!irq_desc[irq].handler->set_affinity) if (!irq_desc[irq].handler->set_affinity)
return -EIO; return -EIO;
err = cpumask_parse(buffer, count, new_value); ret = cpumask_parse(buffer, count, new_value);
if (err) if (ret != 0)
return err; return ret;
/* /*
* We check for CPU_MASK_ALL in xics to send irqs to all cpus. * We check for CPU_MASK_ALL in xics to send irqs to all cpus.
...@@ -721,19 +723,30 @@ static int irq_affinity_write_proc (struct file *file, const char *buffer, ...@@ -721,19 +723,30 @@ static int irq_affinity_write_proc (struct file *file, const char *buffer,
*/ */
cpus_and(new_value, new_value, allcpus); cpus_and(new_value, new_value, allcpus);
/*
* Grab lock here so cpu_online_map can't change, and also
* protect irq_affinity[].
*/
spin_lock(&irq_desc[irq].lock);
/* /*
* Do not allow disabling IRQs completely - it's a too easy * Do not allow disabling IRQs completely - it's a too easy
* way to make the system unusable accidentally :-) At least * way to make the system unusable accidentally :-) At least
* one online CPU still has to be targeted. * one online CPU still has to be targeted.
*/ */
cpus_and(tmp, new_value, cpu_online_map); cpus_and(tmp, new_value, cpu_online_map);
if (cpus_empty(tmp)) if (cpus_empty(tmp)) {
return -EINVAL; ret = -EINVAL;
goto out;
}
irq_affinity[irq] = new_value; irq_affinity[irq] = new_value;
irq_desc[irq].handler->set_affinity(irq, new_value); irq_desc[irq].handler->set_affinity(irq, new_value);
ret = count;
return full_count; out:
spin_unlock(&irq_desc[irq].lock);
return ret;
} }
static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, static int prof_cpu_mask_read_proc (char *page, char **start, off_t off,
...@@ -946,5 +959,4 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq) ...@@ -946,5 +959,4 @@ unsigned int real_irq_to_virt_slowpath(unsigned int real_irq)
} }
#endif #endif
...@@ -494,6 +494,25 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs) ...@@ -494,6 +494,25 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
return 0; return 0;
} }
#ifdef CONFIG_HOTPLUG_CPU
/* This version can't take the spinlock. */
void rtas_stop_self(void)
{
struct rtas_args *rtas_args = &(get_paca()->xRtas);
rtas_args->token = rtas_token("stop-self");
BUG_ON(rtas_args->token == RTAS_UNKNOWN_SERVICE);
rtas_args->nargs = 0;
rtas_args->nret = 1;
rtas_args->rets = &(rtas_args->args[0]);
printk("%u %u Ready to die...\n",
smp_processor_id(), hard_smp_processor_id());
enter_rtas((void *)__pa(rtas_args));
panic("Alas, I survived.\n");
}
#endif /* CONFIG_HOTPLUG_CPU */
EXPORT_SYMBOL(rtas_firmware_flash_list); EXPORT_SYMBOL(rtas_firmware_flash_list);
EXPORT_SYMBOL(rtas_token); EXPORT_SYMBOL(rtas_token);
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/version.h> #include <linux/version.h>
#include <linux/tty.h> #include <linux/tty.h>
#include <linux/root_dev.h> #include <linux/root_dev.h>
#include <linux/cpu.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/prom.h> #include <asm/prom.h>
#include <asm/processor.h> #include <asm/processor.h>
...@@ -338,8 +339,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) ...@@ -338,8 +339,13 @@ static int show_cpuinfo(struct seq_file *m, void *v)
return 0; return 0;
} }
if (!cpu_online(cpu_id)) /* We only show online cpus: disable preempt (overzealous, I
* knew) to prevent cpu going down. */
preempt_disable();
if (!cpu_online(cpu_id)) {
preempt_enable();
return 0; return 0;
}
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
pvr = per_cpu(pvr, cpu_id); pvr = per_cpu(pvr, cpu_id);
...@@ -372,7 +378,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) ...@@ -372,7 +378,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
ppc_proc_freq % 1000000); ppc_proc_freq % 1000000);
seq_printf(m, "revision\t: %hd.%hd\n\n", maj, min); seq_printf(m, "revision\t: %hd.%hd\n\n", maj, min);
preempt_enable();
return 0; return 0;
} }
......
...@@ -230,10 +230,237 @@ static void __devinit smp_openpic_setup_cpu(int cpu) ...@@ -230,10 +230,237 @@ static void __devinit smp_openpic_setup_cpu(int cpu)
do_openpic_setup_cpu(); do_openpic_setup_cpu();
} }
#ifdef CONFIG_HOTPLUG_CPU
/* Get state of physical CPU.
* Return codes:
* 0 - The processor is in the RTAS stopped state
* 1 - stop-self is in progress
* 2 - The processor is not in the RTAS stopped state
* -1 - Hardware Error
* -2 - Hardware Busy, Try again later.
*/
static int query_cpu_stopped(unsigned int pcpu)
{
long cpu_status;
int status, qcss_tok;
qcss_tok = rtas_token("query-cpu-stopped-state");
BUG_ON(qcss_tok == RTAS_UNKNOWN_SERVICE);
status = rtas_call(qcss_tok, 1, 2, &cpu_status, pcpu);
if (status != 0) {
printk(KERN_ERR
"RTAS query-cpu-stopped-state failed: %i\n", status);
return status;
}
return cpu_status;
}
int __cpu_disable(void)
{
/* FIXME: go put this in a header somewhere */
extern void xics_migrate_irqs_away(void);
systemcfg->processorCount--;
/*fix boot_cpuid here*/
if (smp_processor_id() == boot_cpuid)
boot_cpuid = any_online_cpu(cpu_online_map);
/* FIXME: abstract this to not be platform specific later on */
xics_migrate_irqs_away();
return 0;
}
void __cpu_die(unsigned int cpu)
{
int tries;
int cpu_status;
unsigned int pcpu = get_hard_smp_processor_id(cpu);
for (tries = 0; tries < 5; tries++) {
cpu_status = query_cpu_stopped(pcpu);
if (cpu_status == 0)
break;
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ);
}
if (cpu_status != 0) {
printk("Querying DEAD? cpu %i (%i) shows %i\n",
cpu, pcpu, cpu_status);
}
/* Isolation and deallocation are definatly done by
* drslot_chrp_cpu. If they were not they would be
* done here. Change isolate state to Isolate and
* change allocation-state to Unusable.
*/
paca[cpu].xProcStart = 0;
/* So we can recognize if it fails to come up next time. */
cpu_callin_map[cpu] = 0;
}
/* Kill this cpu */
void cpu_die(void)
{
local_irq_disable();
rtas_stop_self();
/* Should never get here... */
BUG();
for(;;);
}
/* Search all cpu device nodes for an offline logical cpu. If a
* device node has a "ibm,my-drc-index" property (meaning this is an
* LPAR), paranoid-check whether we own the cpu. For each "thread"
* of a cpu, if it is offline and has the same hw index as before,
* grab that in preference.
*/
static unsigned int find_physical_cpu_to_start(unsigned int old_hwindex)
{
struct device_node *np = NULL;
unsigned int best = -1U;
while ((np = of_find_node_by_type(np, "cpu"))) {
int nr_threads, len;
u32 *index = (u32 *)get_property(np, "ibm,my-drc-index", NULL);
u32 *tid = (u32 *)
get_property(np, "ibm,ppc-interrupt-server#s", &len);
if (!tid)
tid = (u32 *)get_property(np, "reg", &len);
if (!tid)
continue;
/* If there is a drc-index, make sure that we own
* the cpu.
*/
if (index) {
int state;
int rc = rtas_get_sensor(9003, *index, &state);
if (rc != 0 || state != 1)
continue;
}
nr_threads = len / sizeof(u32);
while (nr_threads--) {
if (0 == query_cpu_stopped(tid[nr_threads])) {
best = tid[nr_threads];
if (best == old_hwindex)
goto out;
}
}
}
out:
of_node_put(np);
return best;
}
/**
* smp_startup_cpu() - start the given cpu
*
* At boot time, there is nothing to do. At run-time, call RTAS with
* the appropriate start location, if the cpu is in the RTAS stopped
* state.
*
* Returns:
* 0 - failure
* 1 - success
*/
static inline int __devinit smp_startup_cpu(unsigned int lcpu)
{
int status;
extern void (*pseries_secondary_smp_init)(unsigned int cpu);
unsigned long start_here = __pa(pseries_secondary_smp_init);
unsigned int pcpu;
/* At boot time the cpus are already spinning in hold
* loops, so nothing to do. */
if (system_state == SYSTEM_BOOTING)
return 1;
pcpu = find_physical_cpu_to_start(get_hard_smp_processor_id(lcpu));
if (pcpu == -1U) {
printk(KERN_INFO "No more cpus available, failing\n");
return 0;
}
/* Fixup atomic count: it exited inside IRQ handler. */
((struct task_struct *)paca[lcpu].xCurrent)->thread_info->preempt_count
= 0;
/* Fixup SLB round-robin so next segment (kernel) goes in segment 0 */
paca[lcpu].xStab_data.next_round_robin = 0;
/* At boot this is done in prom.c. */
paca[lcpu].xHwProcNum = pcpu;
status = rtas_call(rtas_token("start-cpu"), 3, 1, NULL,
pcpu, start_here, lcpu);
if (status != 0) {
printk(KERN_ERR "start-cpu failed: %i\n", status);
return 0;
}
return 1;
}
static inline void look_for_more_cpus(void)
{
int num_addr_cell, num_size_cell, len, i, maxcpus;
struct device_node *np;
unsigned int *ireg;
/* Find the property which will tell us about how many CPUs
* we're allowed to have. */
if ((np = find_path_device("/rtas")) == NULL) {
printk(KERN_ERR "Could not find /rtas in device tree!");
return;
}
num_addr_cell = prom_n_addr_cells(np);
num_size_cell = prom_n_size_cells(np);
ireg = (unsigned int *)get_property(np, "ibm,lrdr-capacity", &len);
if (ireg == NULL) {
/* FIXME: make sure not marked as lrdr_capable() */
return;
}
maxcpus = ireg[num_addr_cell + num_size_cell];
/* DRENG need to account for threads here too */
if (maxcpus > NR_CPUS) {
printk(KERN_WARNING
"Partition configured for %d cpus, "
"operating system maximum is %d.\n", maxcpus, NR_CPUS);
maxcpus = NR_CPUS;
} else
printk(KERN_INFO "Partition configured for %d cpus.\n",
maxcpus);
/* Make those cpus (which might appear later) possible too. */
for (i = 0; i < maxcpus; i++)
cpu_set(i, cpu_possible_map);
}
#else /* ... CONFIG_HOTPLUG_CPU */
static inline int __devinit smp_startup_cpu(unsigned int lcpu)
{
return 1;
}
static inline void look_for_more_cpus(void)
{
}
#endif /* CONFIG_HOTPLUG_CPU */
static void smp_pSeries_kick_cpu(int nr) static void smp_pSeries_kick_cpu(int nr)
{ {
BUG_ON(nr < 0 || nr >= NR_CPUS); BUG_ON(nr < 0 || nr >= NR_CPUS);
if (!smp_startup_cpu(nr))
return;
/* The processor is currently spinning, waiting /* The processor is currently spinning, waiting
* for the xProcStart field to become non-zero * for the xProcStart field to become non-zero
* After we set xProcStart, the processor will * After we set xProcStart, the processor will
...@@ -241,7 +468,7 @@ static void smp_pSeries_kick_cpu(int nr) ...@@ -241,7 +468,7 @@ static void smp_pSeries_kick_cpu(int nr)
*/ */
paca[nr].xProcStart = 1; paca[nr].xProcStart = 1;
} }
#endif #endif /* CONFIG_PPC_PSERIES */
static void __init smp_space_timers(unsigned int max_cpus) static void __init smp_space_timers(unsigned int max_cpus)
{ {
...@@ -462,12 +689,9 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, ...@@ -462,12 +689,9 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
int wait) int wait)
{ {
struct call_data_struct data; struct call_data_struct data;
int ret = -1, cpus = num_online_cpus()-1; int ret = -1, cpus;
unsigned long timeout; unsigned long timeout;
if (!cpus)
return 0;
data.func = func; data.func = func;
data.info = info; data.info = info;
atomic_set(&data.started, 0); atomic_set(&data.started, 0);
...@@ -476,6 +700,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, ...@@ -476,6 +700,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
atomic_set(&data.finished, 0); atomic_set(&data.finished, 0);
spin_lock(&call_lock); spin_lock(&call_lock);
/* Must grab online cpu count with preempt disabled, otherwise
* it can change. */
cpus = num_online_cpus() - 1;
if (!cpus) {
ret = 0;
goto out;
}
call_data = &data; call_data = &data;
wmb(); wmb();
/* Send a message to all other CPUs and wait for them to respond */ /* Send a message to all other CPUs and wait for them to respond */
...@@ -565,8 +797,31 @@ static void __devinit smp_store_cpu_info(int id) ...@@ -565,8 +797,31 @@ static void __devinit smp_store_cpu_info(int id)
per_cpu(pvr, id) = _get_PVR(); per_cpu(pvr, id) = _get_PVR();
} }
static void __init smp_create_idle(unsigned int cpu)
{
struct pt_regs regs;
struct task_struct *p;
/* create a process for the processor */
/* only regs.msr is actually used, and 0 is OK for it */
memset(&regs, 0, sizeof(struct pt_regs));
p = copy_process(CLONE_VM | CLONE_IDLETASK,
0, &regs, 0, NULL, NULL);
if (IS_ERR(p))
panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
wake_up_forked_process(p);
init_idle(p, cpu);
unhash_process(p);
paca[cpu].xCurrent = (u64)p;
current_set[cpu] = p->thread_info;
}
void __init smp_prepare_cpus(unsigned int max_cpus) void __init smp_prepare_cpus(unsigned int max_cpus)
{ {
unsigned int cpu;
/* /*
* setup_cpu may need to be called on the boot cpu. We havent * setup_cpu may need to be called on the boot cpu. We havent
* spun any cpus up but lets be paranoid. * spun any cpus up but lets be paranoid.
...@@ -593,6 +848,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) ...@@ -593,6 +848,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
* number of msecs off until someone does a settimeofday() * number of msecs off until someone does a settimeofday()
*/ */
do_gtod.tb_orig_stamp = tb_last_stamp; do_gtod.tb_orig_stamp = tb_last_stamp;
look_for_more_cpus();
#endif #endif
max_cpus = smp_ops->probe(); max_cpus = smp_ops->probe();
...@@ -601,20 +858,31 @@ void __init smp_prepare_cpus(unsigned int max_cpus) ...@@ -601,20 +858,31 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
__save_cpu_setup(); __save_cpu_setup();
smp_space_timers(max_cpus); smp_space_timers(max_cpus);
for_each_cpu(cpu)
if (cpu != boot_cpuid)
smp_create_idle(cpu);
} }
void __devinit smp_prepare_boot_cpu(void) void __devinit smp_prepare_boot_cpu(void)
{ {
cpu_set(smp_processor_id(), cpu_online_map); BUG_ON(smp_processor_id() != boot_cpuid);
/* FIXME: what about cpu_possible()? */
/* cpu_possible is set up in prom.c */
cpu_set(boot_cpuid, cpu_online_map);
paca[boot_cpuid].xCurrent = (u64)current;
current_set[boot_cpuid] = current->thread_info;
} }
int __devinit __cpu_up(unsigned int cpu) int __devinit __cpu_up(unsigned int cpu)
{ {
struct pt_regs regs;
struct task_struct *p;
int c; int c;
/* At boot, don't bother with non-present cpus -JSCHOPP */
if (system_state == SYSTEM_BOOTING && !cpu_present_at_boot(cpu))
return -ENOENT;
paca[cpu].prof_counter = 1; paca[cpu].prof_counter = 1;
paca[cpu].prof_multiplier = 1; paca[cpu].prof_multiplier = 1;
paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock; paca[cpu].default_decr = tb_ticks_per_jiffy / decr_overclock;
...@@ -632,19 +900,9 @@ int __devinit __cpu_up(unsigned int cpu) ...@@ -632,19 +900,9 @@ int __devinit __cpu_up(unsigned int cpu)
paca[cpu].xStab_data.real = virt_to_abs(tmp); paca[cpu].xStab_data.real = virt_to_abs(tmp);
} }
/* create a process for the processor */ /* The information for processor bringup must be written out
/* only regs.msr is actually used, and 0 is OK for it */ * to main store before we release the processor. */
memset(&regs, 0, sizeof(struct pt_regs)); mb();
p = copy_process(CLONE_VM|CLONE_IDLETASK, 0, &regs, 0, NULL, NULL);
if (IS_ERR(p))
panic("failed fork for CPU %u: %li", cpu, PTR_ERR(p));
wake_up_forked_process(p);
init_idle(p, cpu);
unhash_process(p);
paca[cpu].xCurrent = (u64)p;
current_set[cpu] = p->thread_info;
/* The information for processor bringup must /* The information for processor bringup must
* be written out to main store before we release * be written out to main store before we release
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/radix-tree.h> #include <linux/radix-tree.h>
#include <linux/cpu.h>
#include <asm/prom.h> #include <asm/prom.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -372,6 +373,9 @@ irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs) ...@@ -372,6 +373,9 @@ irqreturn_t xics_ipi_action(int irq, void *dev_id, struct pt_regs *regs)
int cpu = smp_processor_id(); int cpu = smp_processor_id();
ops->qirr_info(cpu, 0xff); ops->qirr_info(cpu, 0xff);
WARN_ON(cpu_is_offline(cpu));
while (xics_ipi_message[cpu].value) { while (xics_ipi_message[cpu].value) {
if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION, if (test_and_clear_bit(PPC_MSG_CALL_FUNCTION,
&xics_ipi_message[cpu].value)) { &xics_ipi_message[cpu].value)) {
...@@ -514,6 +518,9 @@ void xics_init_IRQ(void) ...@@ -514,6 +518,9 @@ void xics_init_IRQ(void)
if (systemcfg->platform == PLATFORM_PSERIES) { if (systemcfg->platform == PLATFORM_PSERIES) {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
for_each_cpu(i) { for_each_cpu(i) {
/* FIXME: Do this dynamically! --RR */
if (!cpu_present_at_boot(i))
continue;
xics_per_cpu[i] = __ioremap((ulong)inodes[get_hard_smp_processor_id(i)].addr, xics_per_cpu[i] = __ioremap((ulong)inodes[get_hard_smp_processor_id(i)].addr,
(ulong)inodes[get_hard_smp_processor_id(i)].size, (ulong)inodes[get_hard_smp_processor_id(i)].size,
_PAGE_NO_CACHE); _PAGE_NO_CACHE);
...@@ -575,9 +582,7 @@ void xics_request_IPIs(void) ...@@ -575,9 +582,7 @@ void xics_request_IPIs(void)
static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
{ {
irq_desc_t *desc = irq_desc + virq;
unsigned int irq; unsigned int irq;
unsigned long flags;
long status; long status;
unsigned long xics_status[2]; unsigned long xics_status[2];
unsigned long newmask; unsigned long newmask;
...@@ -589,14 +594,12 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) ...@@ -589,14 +594,12 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
if (irq == XICS_IPI) if (irq == XICS_IPI)
return; return;
spin_lock_irqsave(&desc->lock, flags);
status = rtas_call(ibm_get_xive, 1, 3, (void *)&xics_status, irq); status = rtas_call(ibm_get_xive, 1, 3, (void *)&xics_status, irq);
if (status) { if (status) {
printk(KERN_ERR "xics_set_affinity: irq=%d ibm,get-xive " printk(KERN_ERR "xics_set_affinity: irq=%d ibm,get-xive "
"returns %ld\n", irq, status); "returns %ld\n", irq, status);
goto out; return;
} }
/* For the moment only implement delivery to all cpus or one cpu */ /* For the moment only implement delivery to all cpus or one cpu */
...@@ -605,7 +608,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) ...@@ -605,7 +608,7 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
} else { } else {
cpus_and(tmp, cpu_online_map, cpumask); cpus_and(tmp, cpu_online_map, cpumask);
if (cpus_empty(tmp)) if (cpus_empty(tmp))
goto out; return;
newmask = get_hard_smp_processor_id(first_cpu(cpumask)); newmask = get_hard_smp_processor_id(first_cpu(cpumask));
} }
...@@ -615,9 +618,86 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask) ...@@ -615,9 +618,86 @@ static void xics_set_affinity(unsigned int virq, cpumask_t cpumask)
if (status) { if (status) {
printk(KERN_ERR "xics_set_affinity irq=%d ibm,set-xive " printk(KERN_ERR "xics_set_affinity irq=%d ibm,set-xive "
"returns %ld\n", irq, status); "returns %ld\n", irq, status);
goto out; return;
}
}
#ifdef CONFIG_HOTPLUG_CPU
/* Interrupts are disabled. */
void xics_migrate_irqs_away(void)
{
int set_indicator = rtas_token("set-indicator");
const unsigned long giqs = 9005UL; /* Global Interrupt Queue Server */
unsigned long status = 0;
unsigned int irq, cpu = smp_processor_id();
unsigned long xics_status[2];
unsigned long flags;
BUG_ON(set_indicator == RTAS_UNKNOWN_SERVICE);
/* Reject any interrupt that was queued to us... */
ops->cppr_info(cpu, 0);
iosync();
/* Refuse any new interrupts... */
rtas_call(set_indicator, 3, 1, &status, giqs,
hard_smp_processor_id(), 0UL);
WARN_ON(status != 0);
/* Allow IPIs again... */
ops->cppr_info(cpu, DEFAULT_PRIORITY);
iosync();
printk(KERN_WARNING "HOTPLUG: Migrating IRQs away\n");
for_each_irq(irq) {
irq_desc_t *desc = get_irq_desc(irq);
/* We need to get IPIs still. */
if (irq_offset_down(irq) == XICS_IPI)
continue;
/* We only need to migrate enabled IRQS */
if (desc == NULL || desc->handler == NULL
|| desc->action == NULL
|| desc->handler->set_affinity == NULL)
continue;
spin_lock_irqsave(&desc->lock, flags);
status = rtas_call(ibm_get_xive, 1, 3, (void *)&xics_status,
irq);
if (status) {
printk(KERN_ERR "migrate_irqs_away: irq=%d "
"ibm,get-xive returns %ld\n",
irq, status);
goto unlock;
}
/*
* We only support delivery to all cpus or to one cpu.
* The irq has to be migrated only in the single cpu
* case.
*/
if (xics_status[0] != get_hard_smp_processor_id(cpu))
goto unlock;
printk(KERN_WARNING "IRQ %d affinity broken off cpu %u\n",
irq, cpu);
/* Reset affinity to all cpus */
xics_status[0] = default_distrib_server;
status = rtas_call(ibm_set_xive, 3, 1, NULL,
irq, xics_status[0], xics_status[1]);
if (status)
printk(KERN_ERR "migrate_irqs_away irq=%d "
"ibm,set-xive returns %ld\n",
irq, status);
unlock:
spin_unlock_irqrestore(&desc->lock, flags);
} }
out:
spin_unlock_irqrestore(&desc->lock, flags);
} }
#endif
...@@ -219,6 +219,8 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); ...@@ -219,6 +219,8 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
extern spinlock_t rtas_data_buf_lock; extern spinlock_t rtas_data_buf_lock;
extern char rtas_data_buf[RTAS_DATA_BUF_SIZE]; extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
extern void rtas_stop_self(void);
/* RMO buffer reserved for user-space RTAS use */ /* RMO buffer reserved for user-space RTAS use */
extern unsigned long rtas_rmo_buf; extern unsigned long rtas_rmo_buf;
......
...@@ -70,6 +70,9 @@ extern cpumask_t cpu_available_map; ...@@ -70,6 +70,9 @@ extern cpumask_t cpu_available_map;
void smp_init_iSeries(void); void smp_init_iSeries(void);
void smp_init_pSeries(void); void smp_init_pSeries(void);
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void cpu_die(void) __attribute__((noreturn));
#endif /* !(CONFIG_SMP) */ #endif /* !(CONFIG_SMP) */
#define get_hard_smp_processor_id(CPU) (paca[(CPU)].xHwProcNum) #define get_hard_smp_processor_id(CPU) (paca[(CPU)].xHwProcNum)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment