Commit 3fc1f1e2 authored by Tejun Heo's avatar Tejun Heo

stop_machine: reimplement using cpu_stop

Reimplement stop_machine using cpu_stop.  As cpu stoppers are
guaranteed to be available for all online cpus,
stop_machine_create/destroy() are no longer necessary and removed.

With resource management and synchronization handled by cpu_stop, the
new implementation is much simpler.  Asking the cpu_stop to execute
the stop_cpu() state machine on all online cpus with cpu hotplug
disabled is enough.

stop_machine itself doesn't need to manage any global resources
anymore, so all per-instance information is rolled into struct
stop_machine_data and the mutex and all static data variables are
removed.

The previous implementation created and destroyed RT workqueues as
necessary which made stop_machine() calls highly expensive on very
large machines.  According to Dimitri Sivanich, preventing the dynamic
creation/destruction makes booting faster more than twice on very
large machines.  cpu_stop resources are preallocated for all online
cpus and should have the same effect.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Acked-by: default avatarPeter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
parent 1142d810
...@@ -390,7 +390,6 @@ static void __init time_init_wq(void) ...@@ -390,7 +390,6 @@ static void __init time_init_wq(void)
if (time_sync_wq) if (time_sync_wq)
return; return;
time_sync_wq = create_singlethread_workqueue("timesync"); time_sync_wq = create_singlethread_workqueue("timesync");
stop_machine_create();
} }
/* /*
......
...@@ -80,12 +80,6 @@ static void do_suspend(void) ...@@ -80,12 +80,6 @@ static void do_suspend(void)
shutting_down = SHUTDOWN_SUSPEND; shutting_down = SHUTDOWN_SUSPEND;
err = stop_machine_create();
if (err) {
printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
goto out;
}
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
/* If the kernel is preemptible, we need to freeze all the processes /* If the kernel is preemptible, we need to freeze all the processes
to prevent them from being in the middle of a pagetable update to prevent them from being in the middle of a pagetable update
...@@ -93,7 +87,7 @@ static void do_suspend(void) ...@@ -93,7 +87,7 @@ static void do_suspend(void)
err = freeze_processes(); err = freeze_processes();
if (err) { if (err) {
printk(KERN_ERR "xen suspend: freeze failed %d\n", err); printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
goto out_destroy_sm; goto out;
} }
#endif #endif
...@@ -136,12 +130,8 @@ static void do_suspend(void) ...@@ -136,12 +130,8 @@ static void do_suspend(void)
out_thaw: out_thaw:
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
thaw_processes(); thaw_processes();
out_destroy_sm:
#endif
stop_machine_destroy();
out: out:
#endif
shutting_down = SHUTDOWN_INVALID; shutting_down = SHUTDOWN_INVALID;
} }
#endif /* CONFIG_PM_SLEEP */ #endif /* CONFIG_PM_SLEEP */
......
...@@ -67,23 +67,6 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); ...@@ -67,23 +67,6 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
*/ */
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
/**
* stop_machine_create: create all stop_machine threads
*
* Description: This causes all stop_machine threads to be created before
* stop_machine actually gets called. This can be used by subsystems that
* need a non failing stop_machine infrastructure.
*/
int stop_machine_create(void);
/**
* stop_machine_destroy: destroy all stop_machine threads
*
* Description: This causes all stop_machine threads which were created with
* stop_machine_create to be destroyed again.
*/
void stop_machine_destroy(void);
#else #else
static inline int stop_machine(int (*fn)(void *), void *data, static inline int stop_machine(int (*fn)(void *), void *data,
...@@ -96,8 +79,5 @@ static inline int stop_machine(int (*fn)(void *), void *data, ...@@ -96,8 +79,5 @@ static inline int stop_machine(int (*fn)(void *), void *data,
return ret; return ret;
} }
static inline int stop_machine_create(void) { return 0; }
static inline void stop_machine_destroy(void) { }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#endif /* _LINUX_STOP_MACHINE */ #endif /* _LINUX_STOP_MACHINE */
...@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu) ...@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu)
{ {
int err; int err;
err = stop_machine_create();
if (err)
return err;
cpu_maps_update_begin(); cpu_maps_update_begin();
if (cpu_hotplug_disabled) { if (cpu_hotplug_disabled) {
...@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu) ...@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu)
out: out:
cpu_maps_update_done(); cpu_maps_update_done();
stop_machine_destroy();
return err; return err;
} }
EXPORT_SYMBOL(cpu_down); EXPORT_SYMBOL(cpu_down);
...@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void) ...@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void)
{ {
int cpu, first_cpu, error; int cpu, first_cpu, error;
error = stop_machine_create();
if (error)
return error;
cpu_maps_update_begin(); cpu_maps_update_begin();
first_cpu = cpumask_first(cpu_online_mask); first_cpu = cpumask_first(cpu_online_mask);
/* /*
...@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void) ...@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void)
printk(KERN_ERR "Non-boot CPUs are not disabled\n"); printk(KERN_ERR "Non-boot CPUs are not disabled\n");
} }
cpu_maps_update_done(); cpu_maps_update_done();
stop_machine_destroy();
return error; return error;
} }
......
...@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, ...@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
return -EFAULT; return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0'; name[MODULE_NAME_LEN-1] = '\0';
/* Create stop_machine threads since free_module relies on if (mutex_lock_interruptible(&module_mutex) != 0)
* a non-failing stop_machine call. */ return -EINTR;
ret = stop_machine_create();
if (ret)
return ret;
if (mutex_lock_interruptible(&module_mutex) != 0) {
ret = -EINTR;
goto out_stop;
}
mod = find_module(name); mod = find_module(name);
if (!mod) { if (!mod) {
...@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, ...@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
out: out:
mutex_unlock(&module_mutex); mutex_unlock(&module_mutex);
out_stop:
stop_machine_destroy();
return ret; return ret;
} }
......
...@@ -388,174 +388,92 @@ enum stopmachine_state { ...@@ -388,174 +388,92 @@ enum stopmachine_state {
/* Exit */ /* Exit */
STOPMACHINE_EXIT, STOPMACHINE_EXIT,
}; };
static enum stopmachine_state state;
struct stop_machine_data { struct stop_machine_data {
int (*fn)(void *); int (*fn)(void *);
void *data; void *data;
int fnret; /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
unsigned int num_threads;
const struct cpumask *active_cpus;
enum stopmachine_state state;
atomic_t thread_ack;
}; };
/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ static void set_state(struct stop_machine_data *smdata,
static unsigned int num_threads; enum stopmachine_state newstate)
static atomic_t thread_ack;
static DEFINE_MUTEX(lock);
/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
static DEFINE_MUTEX(setup_lock);
/* Users of stop_machine. */
static int refcount;
static struct workqueue_struct *stop_machine_wq;
static struct stop_machine_data active, idle;
static const struct cpumask *active_cpus;
static void __percpu *stop_machine_work;
static void set_state(enum stopmachine_state newstate)
{ {
/* Reset ack counter. */ /* Reset ack counter. */
atomic_set(&thread_ack, num_threads); atomic_set(&smdata->thread_ack, smdata->num_threads);
smp_wmb(); smp_wmb();
state = newstate; smdata->state = newstate;
} }
/* Last one to ack a state moves to the next state. */ /* Last one to ack a state moves to the next state. */
static void ack_state(void) static void ack_state(struct stop_machine_data *smdata)
{ {
if (atomic_dec_and_test(&thread_ack)) if (atomic_dec_and_test(&smdata->thread_ack))
set_state(state + 1); set_state(smdata, smdata->state + 1);
} }
/* This is the actual function which stops the CPU. It runs /* This is the cpu_stop function which stops the CPU. */
* in the context of a dedicated stopmachine workqueue. */ static int stop_machine_cpu_stop(void *data)
static void stop_cpu(struct work_struct *unused)
{ {
struct stop_machine_data *smdata = data;
enum stopmachine_state curstate = STOPMACHINE_NONE; enum stopmachine_state curstate = STOPMACHINE_NONE;
struct stop_machine_data *smdata = &idle; int cpu = smp_processor_id(), err = 0;
int cpu = smp_processor_id(); bool is_active;
int err;
if (!smdata->active_cpus)
is_active = cpu == cpumask_first(cpu_online_mask);
else
is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
if (!active_cpus) {
if (cpu == cpumask_first(cpu_online_mask))
smdata = &active;
} else {
if (cpumask_test_cpu(cpu, active_cpus))
smdata = &active;
}
/* Simple state machine */ /* Simple state machine */
do { do {
/* Chill out and ensure we re-read stopmachine_state. */ /* Chill out and ensure we re-read stopmachine_state. */
cpu_relax(); cpu_relax();
if (state != curstate) { if (smdata->state != curstate) {
curstate = state; curstate = smdata->state;
switch (curstate) { switch (curstate) {
case STOPMACHINE_DISABLE_IRQ: case STOPMACHINE_DISABLE_IRQ:
local_irq_disable(); local_irq_disable();
hard_irq_disable(); hard_irq_disable();
break; break;
case STOPMACHINE_RUN: case STOPMACHINE_RUN:
/* On multiple CPUs only a single error code if (is_active)
* is needed to tell that something failed. */ err = smdata->fn(smdata->data);
err = smdata->fn(smdata->data);
if (err)
smdata->fnret = err;
break; break;
default: default:
break; break;
} }
ack_state(); ack_state(smdata);
} }
} while (curstate != STOPMACHINE_EXIT); } while (curstate != STOPMACHINE_EXIT);
local_irq_enable(); local_irq_enable();
return err;
} }
/* Callback for CPUs which aren't supposed to do anything. */
static int chill(void *unused)
{
return 0;
}
int stop_machine_create(void)
{
mutex_lock(&setup_lock);
if (refcount)
goto done;
stop_machine_wq = create_rt_workqueue("kstop");
if (!stop_machine_wq)
goto err_out;
stop_machine_work = alloc_percpu(struct work_struct);
if (!stop_machine_work)
goto err_out;
done:
refcount++;
mutex_unlock(&setup_lock);
return 0;
err_out:
if (stop_machine_wq)
destroy_workqueue(stop_machine_wq);
mutex_unlock(&setup_lock);
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(stop_machine_create);
void stop_machine_destroy(void)
{
mutex_lock(&setup_lock);
refcount--;
if (refcount)
goto done;
destroy_workqueue(stop_machine_wq);
free_percpu(stop_machine_work);
done:
mutex_unlock(&setup_lock);
}
EXPORT_SYMBOL_GPL(stop_machine_destroy);
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
{ {
struct work_struct *sm_work; struct stop_machine_data smdata = { .fn = fn, .data = data,
int i, ret; .num_threads = num_online_cpus(),
.active_cpus = cpus };
/* Set up initial state. */
mutex_lock(&lock); /* Set the initial state and stop all online cpus. */
num_threads = num_online_cpus(); set_state(&smdata, STOPMACHINE_PREPARE);
active_cpus = cpus; return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
active.fn = fn;
active.data = data;
active.fnret = 0;
idle.fn = chill;
idle.data = NULL;
set_state(STOPMACHINE_PREPARE);
/* Schedule the stop_cpu work on all cpus: hold this CPU so one
* doesn't hit this CPU until we're ready. */
get_cpu();
for_each_online_cpu(i) {
sm_work = per_cpu_ptr(stop_machine_work, i);
INIT_WORK(sm_work, stop_cpu);
queue_work_on(i, stop_machine_wq, sm_work);
}
/* This will release the thread on our CPU. */
put_cpu();
flush_workqueue(stop_machine_wq);
ret = active.fnret;
mutex_unlock(&lock);
return ret;
} }
int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
{ {
int ret; int ret;
ret = stop_machine_create();
if (ret)
return ret;
/* No CPUs can come up or down during this. */ /* No CPUs can come up or down during this. */
get_online_cpus(); get_online_cpus();
ret = __stop_machine(fn, data, cpus); ret = __stop_machine(fn, data, cpus);
put_online_cpus(); put_online_cpus();
stop_machine_destroy();
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(stop_machine); EXPORT_SYMBOL_GPL(stop_machine);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment