Commit 5fbfb18d authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

Fix up possibly racy module refcounting

Module refcounting is implemented with a per-cpu counter for speed.
However there is a race when tallying the counter where a reference may
be taken by one CPU and released by another.  Reference count summation
may then see the decrement without having seen the previous increment,
leading to lower than expected count.  A module which never has its
actual reference drop below 1 may return a reference count of 0 due to
this race.

Module removal generally runs under stop_machine, which prevents this
race causing bugs due to removal of in-use modules.  However there are
other real bugs in module.c code and driver code (module_refcount is
exported) where the callers do not run under stop_machine.

Fix this by maintaining running per-cpu counters for the number of
module refcount increments and the number of refcount decrements.  The
increments are tallied after the decrements, so any decrement seen will
always have its corresponding increment counted.  The final refcount is
the difference of the total increments and decrements, preventing a
low-refcount from being returned.
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Acked-by: default avatarRusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 7da23b86
...@@ -368,7 +368,8 @@ struct module ...@@ -368,7 +368,8 @@ struct module
void (*exit)(void); void (*exit)(void);
struct module_ref { struct module_ref {
int count; unsigned int incs;
unsigned int decs;
} __percpu *refptr; } __percpu *refptr;
#endif #endif
...@@ -463,9 +464,9 @@ static inline void __module_get(struct module *module) ...@@ -463,9 +464,9 @@ static inline void __module_get(struct module *module)
{ {
if (module) { if (module) {
preempt_disable(); preempt_disable();
__this_cpu_inc(module->refptr->count); __this_cpu_inc(module->refptr->incs);
trace_module_get(module, _THIS_IP_, trace_module_get(module, _THIS_IP_,
__this_cpu_read(module->refptr->count)); __this_cpu_read(module->refptr->incs));
preempt_enable(); preempt_enable();
} }
} }
...@@ -478,11 +479,10 @@ static inline int try_module_get(struct module *module) ...@@ -478,11 +479,10 @@ static inline int try_module_get(struct module *module)
preempt_disable(); preempt_disable();
if (likely(module_is_live(module))) { if (likely(module_is_live(module))) {
__this_cpu_inc(module->refptr->count); __this_cpu_inc(module->refptr->incs);
trace_module_get(module, _THIS_IP_, trace_module_get(module, _THIS_IP_,
__this_cpu_read(module->refptr->count)); __this_cpu_read(module->refptr->incs));
} } else
else
ret = 0; ret = 0;
preempt_enable(); preempt_enable();
......
...@@ -521,11 +521,13 @@ static void module_unload_init(struct module *mod) ...@@ -521,11 +521,13 @@ static void module_unload_init(struct module *mod)
int cpu; int cpu;
INIT_LIST_HEAD(&mod->modules_which_use_me); INIT_LIST_HEAD(&mod->modules_which_use_me);
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu) {
per_cpu_ptr(mod->refptr, cpu)->count = 0; per_cpu_ptr(mod->refptr, cpu)->incs = 0;
per_cpu_ptr(mod->refptr, cpu)->decs = 0;
}
/* Hold reference count during initialization. */ /* Hold reference count during initialization. */
__this_cpu_write(mod->refptr->count, 1); __this_cpu_write(mod->refptr->incs, 1);
/* Backwards compatibility macros put refcount during init. */ /* Backwards compatibility macros put refcount during init. */
mod->waiter = current; mod->waiter = current;
} }
...@@ -664,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced) ...@@ -664,12 +666,28 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
unsigned int module_refcount(struct module *mod) unsigned int module_refcount(struct module *mod)
{ {
unsigned int total = 0; unsigned int incs = 0, decs = 0;
int cpu; int cpu;
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
total += per_cpu_ptr(mod->refptr, cpu)->count; decs += per_cpu_ptr(mod->refptr, cpu)->decs;
return total; /*
* ensure the incs are added up after the decs.
* module_put ensures incs are visible before decs with smp_wmb.
*
* This 2-count scheme avoids the situation where the refcount
* for CPU0 is read, then CPU0 increments the module refcount,
* then CPU1 drops that refcount, then the refcount for CPU1 is
* read. We would record a decrement but not its corresponding
* increment so we would see a low count (disaster).
*
* Rare situation? But module_refcount can be preempted, and we
* might be tallying up 4096+ CPUs. So it is not impossible.
*/
smp_rmb();
for_each_possible_cpu(cpu)
incs += per_cpu_ptr(mod->refptr, cpu)->incs;
return incs - decs;
} }
EXPORT_SYMBOL(module_refcount); EXPORT_SYMBOL(module_refcount);
...@@ -846,10 +864,11 @@ void module_put(struct module *module) ...@@ -846,10 +864,11 @@ void module_put(struct module *module)
{ {
if (module) { if (module) {
preempt_disable(); preempt_disable();
__this_cpu_dec(module->refptr->count); smp_wmb(); /* see comment in module_refcount */
__this_cpu_inc(module->refptr->decs);
trace_module_put(module, _RET_IP_, trace_module_put(module, _RET_IP_,
__this_cpu_read(module->refptr->count)); __this_cpu_read(module->refptr->decs));
/* Maybe they're waiting for us to drop reference? */ /* Maybe they're waiting for us to drop reference? */
if (unlikely(!module_is_live(module))) if (unlikely(!module_is_live(module)))
wake_up_process(module->waiter); wake_up_process(module->waiter);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment