Commit 73a40064 authored by Yi Li's avatar Yi Li Committed by Mike Frysinger

Blackfin: SMP: rewrite IPI handling to avoid memory allocation

Currently, sending an interprocessor interrupt (IPI) requires building up
a message dynamically which means memory allocation.  But often times, we
will want to send an IPI in low level contexts where allocation is not
possible which may lead to a panic().  So create a per-cpu static array
for the message queue and use that instead.

Further, while we have two supplemental interrupts, we are currently only
using one of them.  So use the second one for the most common IPI message
of all -- smp_send_reschedule().  This avoids ugly contention for locks
which in turn would require an IPI message ...

In general, this improves SMP performance, and in some cases allows the
SMP port to work in places it wouldn't before.  Such as the PREEMPT_RT
state where the slab is protected by a per-cpu spin lock.  If the slab
kmalloc/kfree were to put the task to sleep, and that task was actually
the IPI handler, then the system falls down yet again.

After running some various stress tests on the system, the static limit
of 5 messages seems to work.  On the off chance even this overflows, we
simply panic(), and we can review that scenario to see if the limit needs
to be increased a bit more.
Signed-off-by: default avatarYi Li <yi.li@analog.com>
Signed-off-by: default avatarMike Frysinger <vapier@gentoo.org>
parent 2c1657c2
...@@ -19,13 +19,13 @@ int platform_boot_secondary(unsigned int cpu, struct task_struct *idle); ...@@ -19,13 +19,13 @@ int platform_boot_secondary(unsigned int cpu, struct task_struct *idle);
void platform_secondary_init(unsigned int cpu); void platform_secondary_init(unsigned int cpu);
void platform_request_ipi(/*irq_handler_t*/ void *handler); void platform_request_ipi(int irq, /*irq_handler_t*/ void *handler);
void platform_send_ipi(cpumask_t callmap); void platform_send_ipi(cpumask_t callmap, int irq);
void platform_send_ipi_cpu(unsigned int cpu); void platform_send_ipi_cpu(unsigned int cpu, int irq);
void platform_clear_ipi(unsigned int cpu); void platform_clear_ipi(unsigned int cpu, int irq);
void bfin_local_timer_setup(void); void bfin_local_timer_setup(void);
......
...@@ -111,41 +111,46 @@ int __cpuinit platform_boot_secondary(unsigned int cpu, struct task_struct *idle ...@@ -111,41 +111,46 @@ int __cpuinit platform_boot_secondary(unsigned int cpu, struct task_struct *idle
panic("CPU%u: processor failed to boot\n", cpu); panic("CPU%u: processor failed to boot\n", cpu);
} }
void __init platform_request_ipi(void *handler) static const char supple0[] = "IRQ_SUPPLE_0";
static const char supple1[] = "IRQ_SUPPLE_1";
void __init platform_request_ipi(int irq, void *handler)
{ {
int ret; int ret;
const char *name = (irq == IRQ_SUPPLE_0) ? supple0 : supple1;
ret = request_irq(IRQ_SUPPLE_0, handler, IRQF_DISABLED, ret = request_irq(irq, handler, IRQF_DISABLED | IRQF_PERCPU, name, handler);
"Supplemental Interrupt0", handler);
if (ret) if (ret)
panic("Cannot request supplemental interrupt 0 for IPI service"); panic("Cannot request %s for IPI service", name);
} }
void platform_send_ipi(cpumask_t callmap) void platform_send_ipi(cpumask_t callmap, int irq)
{ {
unsigned int cpu; unsigned int cpu;
int offset = (irq == IRQ_SUPPLE_0) ? 6 : 8;
for_each_cpu_mask(cpu, callmap) { for_each_cpu_mask(cpu, callmap) {
BUG_ON(cpu >= 2); BUG_ON(cpu >= 2);
SSYNC(); SSYNC();
bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (6 + cpu))); bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (offset + cpu)));
SSYNC(); SSYNC();
} }
} }
void platform_send_ipi_cpu(unsigned int cpu) void platform_send_ipi_cpu(unsigned int cpu, int irq)
{ {
int offset = (irq == IRQ_SUPPLE_0) ? 6 : 8;
BUG_ON(cpu >= 2); BUG_ON(cpu >= 2);
SSYNC(); SSYNC();
bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (6 + cpu))); bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (offset + cpu)));
SSYNC(); SSYNC();
} }
void platform_clear_ipi(unsigned int cpu) void platform_clear_ipi(unsigned int cpu, int irq)
{ {
int offset = (irq == IRQ_SUPPLE_0) ? 10 : 12;
BUG_ON(cpu >= 2); BUG_ON(cpu >= 2);
SSYNC(); SSYNC();
bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (10 + cpu))); bfin_write_SICB_SYSCR(bfin_read_SICB_SYSCR() | (1 << (offset + cpu)));
SSYNC(); SSYNC();
} }
......
...@@ -60,8 +60,7 @@ struct smp_call_struct { ...@@ -60,8 +60,7 @@ struct smp_call_struct {
void (*func)(void *info); void (*func)(void *info);
void *info; void *info;
int wait; int wait;
cpumask_t pending; cpumask_t *waitmask;
cpumask_t waitmask;
}; };
static struct blackfin_flush_data smp_flush_data; static struct blackfin_flush_data smp_flush_data;
...@@ -69,15 +68,19 @@ static struct blackfin_flush_data smp_flush_data; ...@@ -69,15 +68,19 @@ static struct blackfin_flush_data smp_flush_data;
static DEFINE_SPINLOCK(stop_lock); static DEFINE_SPINLOCK(stop_lock);
struct ipi_message { struct ipi_message {
struct list_head list;
unsigned long type; unsigned long type;
struct smp_call_struct call_struct; struct smp_call_struct call_struct;
}; };
/* A magic number - stress test shows this is safe for common cases */
#define BFIN_IPI_MSGQ_LEN 5
/* Simple FIFO buffer, overflow leads to panic */
struct ipi_message_queue { struct ipi_message_queue {
struct list_head head;
spinlock_t lock; spinlock_t lock;
unsigned long count; unsigned long count;
unsigned long head; /* head of the queue */
struct ipi_message ipi_message[BFIN_IPI_MSGQ_LEN];
}; };
static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue); static DEFINE_PER_CPU(struct ipi_message_queue, ipi_msg_queue);
...@@ -116,7 +119,6 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg) ...@@ -116,7 +119,6 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
func = msg->call_struct.func; func = msg->call_struct.func;
info = msg->call_struct.info; info = msg->call_struct.info;
wait = msg->call_struct.wait; wait = msg->call_struct.wait;
cpu_clear(cpu, msg->call_struct.pending);
func(info); func(info);
if (wait) { if (wait) {
#ifdef __ARCH_SYNC_CORE_DCACHE #ifdef __ARCH_SYNC_CORE_DCACHE
...@@ -127,51 +129,57 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg) ...@@ -127,51 +129,57 @@ static void ipi_call_function(unsigned int cpu, struct ipi_message *msg)
*/ */
resync_core_dcache(); resync_core_dcache();
#endif #endif
cpu_clear(cpu, msg->call_struct.waitmask); cpu_clear(cpu, *msg->call_struct.waitmask);
} else }
kfree(msg);
} }
static irqreturn_t ipi_handler(int irq, void *dev_instance) /* Use IRQ_SUPPLE_0 to request reschedule.
* When returning from interrupt to user space,
* there is chance to reschedule */
static irqreturn_t ipi_handler_int0(int irq, void *dev_instance)
{
unsigned int cpu = smp_processor_id();
platform_clear_ipi(cpu, IRQ_SUPPLE_0);
return IRQ_HANDLED;
}
static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
{ {
struct ipi_message *msg; struct ipi_message *msg;
struct ipi_message_queue *msg_queue; struct ipi_message_queue *msg_queue;
unsigned int cpu = smp_processor_id(); unsigned int cpu = smp_processor_id();
unsigned long flags;
platform_clear_ipi(cpu); platform_clear_ipi(cpu, IRQ_SUPPLE_1);
msg_queue = &__get_cpu_var(ipi_msg_queue); msg_queue = &__get_cpu_var(ipi_msg_queue);
msg_queue->count++;
spin_lock(&msg_queue->lock); spin_lock_irqsave(&msg_queue->lock, flags);
while (!list_empty(&msg_queue->head)) {
msg = list_entry(msg_queue->head.next, typeof(*msg), list); while (msg_queue->count) {
list_del(&msg->list); msg = &msg_queue->ipi_message[msg_queue->head];
switch (msg->type) { switch (msg->type) {
case BFIN_IPI_RESCHEDULE:
/* That's the easiest one; leave it to
* return_from_int. */
kfree(msg);
break;
case BFIN_IPI_CALL_FUNC: case BFIN_IPI_CALL_FUNC:
spin_unlock(&msg_queue->lock); spin_unlock_irqrestore(&msg_queue->lock, flags);
ipi_call_function(cpu, msg); ipi_call_function(cpu, msg);
spin_lock(&msg_queue->lock); spin_lock_irqsave(&msg_queue->lock, flags);
break; break;
case BFIN_IPI_CPU_STOP: case BFIN_IPI_CPU_STOP:
spin_unlock(&msg_queue->lock); spin_unlock_irqrestore(&msg_queue->lock, flags);
ipi_cpu_stop(cpu); ipi_cpu_stop(cpu);
spin_lock(&msg_queue->lock); spin_lock_irqsave(&msg_queue->lock, flags);
kfree(msg);
break; break;
default: default:
printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n", printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%lx\n",
cpu, msg->type); cpu, msg->type);
kfree(msg);
break; break;
} }
msg_queue->head++;
msg_queue->head %= BFIN_IPI_MSGQ_LEN;
msg_queue->count--;
} }
spin_unlock(&msg_queue->lock); spin_unlock_irqrestore(&msg_queue->lock, flags);
return IRQ_HANDLED; return IRQ_HANDLED;
} }
...@@ -181,48 +189,47 @@ static void ipi_queue_init(void) ...@@ -181,48 +189,47 @@ static void ipi_queue_init(void)
struct ipi_message_queue *msg_queue; struct ipi_message_queue *msg_queue;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
msg_queue = &per_cpu(ipi_msg_queue, cpu); msg_queue = &per_cpu(ipi_msg_queue, cpu);
INIT_LIST_HEAD(&msg_queue->head);
spin_lock_init(&msg_queue->lock); spin_lock_init(&msg_queue->lock);
msg_queue->count = 0; msg_queue->count = 0;
msg_queue->head = 0;
} }
} }
int smp_call_function(void (*func)(void *info), void *info, int wait) static inline void smp_send_message(cpumask_t callmap, unsigned long type,
void (*func) (void *info), void *info, int wait)
{ {
unsigned int cpu; unsigned int cpu;
cpumask_t callmap;
unsigned long flags;
struct ipi_message_queue *msg_queue; struct ipi_message_queue *msg_queue;
struct ipi_message *msg; struct ipi_message *msg;
unsigned long flags, next_msg;
callmap = cpu_online_map; cpumask_t waitmask = callmap; /* waitmask is shared by all cpus */
cpu_clear(smp_processor_id(), callmap);
if (cpus_empty(callmap))
return 0;
msg = kmalloc(sizeof(*msg), GFP_ATOMIC);
if (!msg)
return -ENOMEM;
INIT_LIST_HEAD(&msg->list);
msg->call_struct.func = func;
msg->call_struct.info = info;
msg->call_struct.wait = wait;
msg->call_struct.pending = callmap;
msg->call_struct.waitmask = callmap;
msg->type = BFIN_IPI_CALL_FUNC;
for_each_cpu_mask(cpu, callmap) { for_each_cpu_mask(cpu, callmap) {
msg_queue = &per_cpu(ipi_msg_queue, cpu); msg_queue = &per_cpu(ipi_msg_queue, cpu);
spin_lock_irqsave(&msg_queue->lock, flags); spin_lock_irqsave(&msg_queue->lock, flags);
list_add_tail(&msg->list, &msg_queue->head); if (msg_queue->count < BFIN_IPI_MSGQ_LEN) {
next_msg = (msg_queue->head + msg_queue->count)
% BFIN_IPI_MSGQ_LEN;
msg = &msg_queue->ipi_message[next_msg];
msg->type = type;
if (type == BFIN_IPI_CALL_FUNC) {
msg->call_struct.func = func;
msg->call_struct.info = info;
msg->call_struct.wait = wait;
msg->call_struct.waitmask = &waitmask;
}
msg_queue->count++;
} else
panic("IPI message queue overflow\n");
spin_unlock_irqrestore(&msg_queue->lock, flags); spin_unlock_irqrestore(&msg_queue->lock, flags);
platform_send_ipi_cpu(cpu); platform_send_ipi_cpu(cpu, IRQ_SUPPLE_1);
} }
if (wait) { if (wait) {
while (!cpus_empty(msg->call_struct.waitmask)) while (!cpus_empty(waitmask))
blackfin_dcache_invalidate_range( blackfin_dcache_invalidate_range(
(unsigned long)(&msg->call_struct.waitmask), (unsigned long)(&waitmask),
(unsigned long)(&msg->call_struct.waitmask)); (unsigned long)(&waitmask));
#ifdef __ARCH_SYNC_CORE_DCACHE #ifdef __ARCH_SYNC_CORE_DCACHE
/* /*
* Invalidate D cache in case shared data was changed by * Invalidate D cache in case shared data was changed by
...@@ -230,8 +237,20 @@ int smp_call_function(void (*func)(void *info), void *info, int wait) ...@@ -230,8 +237,20 @@ int smp_call_function(void (*func)(void *info), void *info, int wait)
*/ */
resync_core_dcache(); resync_core_dcache();
#endif #endif
kfree(msg);
} }
}
int smp_call_function(void (*func)(void *info), void *info, int wait)
{
cpumask_t callmap;
callmap = cpu_online_map;
cpu_clear(smp_processor_id(), callmap);
if (cpus_empty(callmap))
return 0;
smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(smp_call_function); EXPORT_SYMBOL_GPL(smp_call_function);
...@@ -241,100 +260,39 @@ int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, ...@@ -241,100 +260,39 @@ int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
{ {
unsigned int cpu = cpuid; unsigned int cpu = cpuid;
cpumask_t callmap; cpumask_t callmap;
unsigned long flags;
struct ipi_message_queue *msg_queue;
struct ipi_message *msg;
if (cpu_is_offline(cpu)) if (cpu_is_offline(cpu))
return 0; return 0;
cpus_clear(callmap); cpus_clear(callmap);
cpu_set(cpu, callmap); cpu_set(cpu, callmap);
msg = kmalloc(sizeof(*msg), GFP_ATOMIC); smp_send_message(callmap, BFIN_IPI_CALL_FUNC, func, info, wait);
if (!msg)
return -ENOMEM;
INIT_LIST_HEAD(&msg->list);
msg->call_struct.func = func;
msg->call_struct.info = info;
msg->call_struct.wait = wait;
msg->call_struct.pending = callmap;
msg->call_struct.waitmask = callmap;
msg->type = BFIN_IPI_CALL_FUNC;
msg_queue = &per_cpu(ipi_msg_queue, cpu);
spin_lock_irqsave(&msg_queue->lock, flags);
list_add_tail(&msg->list, &msg_queue->head);
spin_unlock_irqrestore(&msg_queue->lock, flags);
platform_send_ipi_cpu(cpu);
if (wait) {
while (!cpus_empty(msg->call_struct.waitmask))
blackfin_dcache_invalidate_range(
(unsigned long)(&msg->call_struct.waitmask),
(unsigned long)(&msg->call_struct.waitmask));
#ifdef __ARCH_SYNC_CORE_DCACHE
/*
* Invalidate D cache in case shared data was changed by
* other processors to ensure cache coherence.
*/
resync_core_dcache();
#endif
kfree(msg);
}
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(smp_call_function_single); EXPORT_SYMBOL_GPL(smp_call_function_single);
void smp_send_reschedule(int cpu) void smp_send_reschedule(int cpu)
{ {
unsigned long flags; /* simply trigger an ipi */
struct ipi_message_queue *msg_queue;
struct ipi_message *msg;
if (cpu_is_offline(cpu)) if (cpu_is_offline(cpu))
return; return;
platform_send_ipi_cpu(cpu, IRQ_SUPPLE_0);
msg = kzalloc(sizeof(*msg), GFP_ATOMIC);
if (!msg)
return;
INIT_LIST_HEAD(&msg->list);
msg->type = BFIN_IPI_RESCHEDULE;
msg_queue = &per_cpu(ipi_msg_queue, cpu);
spin_lock_irqsave(&msg_queue->lock, flags);
list_add_tail(&msg->list, &msg_queue->head);
spin_unlock_irqrestore(&msg_queue->lock, flags);
platform_send_ipi_cpu(cpu);
return; return;
} }
void smp_send_stop(void) void smp_send_stop(void)
{ {
unsigned int cpu;
cpumask_t callmap; cpumask_t callmap;
unsigned long flags;
struct ipi_message_queue *msg_queue;
struct ipi_message *msg;
callmap = cpu_online_map; callmap = cpu_online_map;
cpu_clear(smp_processor_id(), callmap); cpu_clear(smp_processor_id(), callmap);
if (cpus_empty(callmap)) if (cpus_empty(callmap))
return; return;
msg = kzalloc(sizeof(*msg), GFP_ATOMIC); smp_send_message(callmap, BFIN_IPI_CPU_STOP, NULL, NULL, 0);
if (!msg)
return;
INIT_LIST_HEAD(&msg->list);
msg->type = BFIN_IPI_CPU_STOP;
for_each_cpu_mask(cpu, callmap) {
msg_queue = &per_cpu(ipi_msg_queue, cpu);
spin_lock_irqsave(&msg_queue->lock, flags);
list_add_tail(&msg->list, &msg_queue->head);
spin_unlock_irqrestore(&msg_queue->lock, flags);
platform_send_ipi_cpu(cpu);
}
return; return;
} }
...@@ -441,7 +399,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) ...@@ -441,7 +399,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
{ {
platform_prepare_cpus(max_cpus); platform_prepare_cpus(max_cpus);
ipi_queue_init(); ipi_queue_init();
platform_request_ipi(ipi_handler); platform_request_ipi(IRQ_SUPPLE_0, ipi_handler_int0);
platform_request_ipi(IRQ_SUPPLE_1, ipi_handler_int1);
} }
void __init smp_cpus_done(unsigned int max_cpus) void __init smp_cpus_done(unsigned int max_cpus)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment