Commit 21af2f02 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] per-cpu support inside modules (minimal)

From: Rusty Russell <rusty@rustcorp.com.au>

OK, this does the *minimum* required to support DEFINE_PER_CPU inside
modules.  If we decide to change kmalloc_percpu later, great, we can turf
this out.

Basically, overallocates the amount of per-cpu data at boot to at least
PERCPU_ENOUGH_ROOM if CONFIG_MODULES=y (arch-specific by default 32k: I have
only 7744 bytes of percpu data in my kernel here, so makes sense), and a
special allocator in module.c dishes it out.
parent 32028c70
...@@ -887,3 +887,13 @@ module_arch_cleanup (struct module *mod) ...@@ -887,3 +887,13 @@ module_arch_cleanup (struct module *mod)
if (mod->arch.unwind) if (mod->arch.unwind)
unw_remove_unwind_table(mod->arch.unw_table); unw_remove_unwind_table(mod->arch.unw_table);
} }
#ifdef CONFIG_SMP
void percpu_modcopy(void *pcpudst, const void *src, unsigned long size)
{
unsigned int i;
for (i = 0; i < NR_CPUS; i++)
if (cpu_possible(i))
memcpy(pcpudst + __per_cpu_offset[i], src, size);
}
#endif /* CONFIG_SMP */
...@@ -8,22 +8,25 @@ ...@@ -8,22 +8,25 @@
extern unsigned long __per_cpu_offset[NR_CPUS]; extern unsigned long __per_cpu_offset[NR_CPUS];
/* Separate out the type, so (int[3], foo) works. */ /* Separate out the type, so (int[3], foo) works. */
#ifndef MODULE
#define DEFINE_PER_CPU(type, name) \ #define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu
#endif
/* var is in discarded region: offset to particular copy we want */ /* var is in discarded region: offset to particular copy we want */
#define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu])) #define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu]))
#define __get_cpu_var(var) per_cpu(var, smp_processor_id()) #define __get_cpu_var(var) per_cpu(var, smp_processor_id())
static inline void percpu_modcopy(void *pcpudst, const void *src,
unsigned long size)
{
unsigned int i;
for (i = 0; i < NR_CPUS; i++)
if (cpu_possible(i))
memcpy(pcpudst + __per_cpu_offset[i], src, size);
}
#else /* ! SMP */ #else /* ! SMP */
/* Can't define per-cpu variables in modules. Sorry --RR */
#ifndef MODULE
#define DEFINE_PER_CPU(type, name) \ #define DEFINE_PER_CPU(type, name) \
__typeof__(type) name##__per_cpu __typeof__(type) name##__per_cpu
#endif
#define per_cpu(var, cpu) ((void)cpu, var##__per_cpu) #define per_cpu(var, cpu) ((void)cpu, var##__per_cpu)
#define __get_cpu_var(var) var##__per_cpu #define __get_cpu_var(var) var##__per_cpu
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
* Copyright (C) 2002-2003 Hewlett-Packard Co * Copyright (C) 2002-2003 Hewlett-Packard Co
* David Mosberger-Tang <davidm@hpl.hp.com> * David Mosberger-Tang <davidm@hpl.hp.com>
*/ */
#define PERCPU_ENOUGH_ROOM PERCPU_PAGE_SIZE
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
...@@ -19,15 +20,15 @@ ...@@ -19,15 +20,15 @@
extern unsigned long __per_cpu_offset[NR_CPUS]; extern unsigned long __per_cpu_offset[NR_CPUS];
#ifndef MODULE
#define DEFINE_PER_CPU(type, name) \ #define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu __attribute__((__section__(".data.percpu"))) __typeof__(type) name##__per_cpu
#endif
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu #define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu
#define __get_cpu_var(var) (var##__per_cpu) #define __get_cpu_var(var) (var##__per_cpu)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
# define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu])) # define per_cpu(var, cpu) (*RELOC_HIDE(&var##__per_cpu, __per_cpu_offset[cpu]))
extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size);
#else #else
# define per_cpu(var, cpu) ((void)cpu, __get_cpu_var(var)) # define per_cpu(var, cpu) ((void)cpu, __get_cpu_var(var))
#endif #endif
......
...@@ -247,6 +247,9 @@ struct module ...@@ -247,6 +247,9 @@ struct module
char *strtab; char *strtab;
#endif #endif
/* Per-cpu data. */
void *percpu;
/* The command line arguments (may be mangled). People like /* The command line arguments (may be mangled). People like
keeping pointers to this stuff */ keeping pointers to this stuff */
char *args; char *args;
......
...@@ -2,9 +2,15 @@ ...@@ -2,9 +2,15 @@
#define __LINUX_PERCPU_H #define __LINUX_PERCPU_H
#include <linux/spinlock.h> /* For preempt_disable() */ #include <linux/spinlock.h> /* For preempt_disable() */
#include <linux/slab.h> /* For kmalloc() */ #include <linux/slab.h> /* For kmalloc() */
#include <linux/smp.h>
#include <linux/string.h> /* For memset() */ #include <linux/string.h> /* For memset() */
#include <asm/percpu.h> #include <asm/percpu.h>
/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
#ifndef PERCPU_ENOUGH_ROOM
#define PERCPU_ENOUGH_ROOM 32768
#endif
/* Must be an lvalue. */ /* Must be an lvalue. */
#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); })) #define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); }))
#define put_cpu_var(var) preempt_enable() #define put_cpu_var(var) preempt_enable()
......
...@@ -318,14 +318,16 @@ static void __init setup_per_cpu_areas(void) ...@@ -318,14 +318,16 @@ static void __init setup_per_cpu_areas(void)
/* Copy section for each CPU (we discard the original) */ /* Copy section for each CPU (we discard the original) */
size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
if (!size) #ifdef CONFIG_MODULES
return; if (size < PERCPU_ENOUGH_ROOM)
size = PERCPU_ENOUGH_ROOM;
#endif
ptr = alloc_bootmem(size * NR_CPUS); ptr = alloc_bootmem(size * NR_CPUS);
for (i = 0; i < NR_CPUS; i++, ptr += size) { for (i = 0; i < NR_CPUS; i++, ptr += size) {
__per_cpu_offset[i] = ptr - __per_cpu_start; __per_cpu_offset[i] = ptr - __per_cpu_start;
memcpy(ptr, __per_cpu_start, size); memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
} }
} }
#endif /* !__GENERIC_PER_CPU */ #endif /* !__GENERIC_PER_CPU */
......
...@@ -205,6 +205,167 @@ static struct module *find_module(const char *name) ...@@ -205,6 +205,167 @@ static struct module *find_module(const char *name)
return NULL; return NULL;
} }
#ifdef CONFIG_SMP
/* Number of blocks used and allocated. */
static unsigned int pcpu_num_used, pcpu_num_allocated;
/* Size of each block. -ve means used. */
static int *pcpu_size;
static int split_block(unsigned int i, unsigned short size)
{
/* Reallocation required? */
if (pcpu_num_used + 1 > pcpu_num_allocated) {
int *new = kmalloc(sizeof(new[0]) * pcpu_num_allocated*2,
GFP_KERNEL);
if (!new)
return 0;
memcpy(new, pcpu_size, sizeof(new[0])*pcpu_num_allocated);
pcpu_num_allocated *= 2;
kfree(pcpu_size);
pcpu_size = new;
}
/* Insert a new subblock */
memmove(&pcpu_size[i+1], &pcpu_size[i],
sizeof(pcpu_size[0]) * (pcpu_num_used - i));
pcpu_num_used++;
pcpu_size[i+1] -= size;
pcpu_size[i] = size;
return 1;
}
static inline unsigned int block_size(int val)
{
if (val < 0)
return -val;
return val;
}
/* Created by linker magic */
extern char __per_cpu_start[], __per_cpu_end[];
static void *percpu_modalloc(unsigned long size, unsigned long align)
{
unsigned long extra;
unsigned int i;
void *ptr;
BUG_ON(align > SMP_CACHE_BYTES);
ptr = __per_cpu_start;
for (i = 0; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
/* Extra for alignment requirement. */
extra = ALIGN((unsigned long)ptr, align) - (unsigned long)ptr;
BUG_ON(i == 0 && extra != 0);
if (pcpu_size[i] < 0 || pcpu_size[i] < extra + size)
continue;
/* Transfer extra to previous block. */
if (pcpu_size[i-1] < 0)
pcpu_size[i-1] -= extra;
else
pcpu_size[i-1] += extra;
pcpu_size[i] -= extra;
ptr += extra;
/* Split block if warranted */
if (pcpu_size[i] - size > sizeof(unsigned long))
if (!split_block(i, size))
return NULL;
/* Mark allocated */
pcpu_size[i] = -pcpu_size[i];
return ptr;
}
printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n",
size);
return NULL;
}
static void percpu_modfree(void *freeme)
{
unsigned int i;
void *ptr = __per_cpu_start + block_size(pcpu_size[0]);
/* First entry is core kernel percpu data. */
for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) {
if (ptr == freeme) {
pcpu_size[i] = -pcpu_size[i];
goto free;
}
}
BUG();
free:
/* Merge with previous? */
if (pcpu_size[i-1] >= 0) {
pcpu_size[i-1] += pcpu_size[i];
pcpu_num_used--;
memmove(&pcpu_size[i], &pcpu_size[i+1],
(pcpu_num_used - i) * sizeof(pcpu_size[0]));
i--;
}
/* Merge with next? */
if (i+1 < pcpu_num_used && pcpu_size[i+1] >= 0) {
pcpu_size[i] += pcpu_size[i+1];
pcpu_num_used--;
memmove(&pcpu_size[i+1], &pcpu_size[i+2],
(pcpu_num_used - (i+1)) * sizeof(pcpu_size[0]));
}
}
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
{
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
}
static int percpu_modinit(void)
{
pcpu_num_used = 2;
pcpu_num_allocated = 2;
pcpu_size = kmalloc(sizeof(pcpu_size[0]) * pcpu_num_allocated,
GFP_KERNEL);
/* Static in-kernel percpu data (used). */
pcpu_size[0] = -ALIGN(__per_cpu_end-__per_cpu_start, SMP_CACHE_BYTES);
/* Free room. */
pcpu_size[1] = PERCPU_ENOUGH_ROOM + pcpu_size[0];
if (pcpu_size[1] < 0) {
printk(KERN_ERR "No per-cpu room for modules.\n");
pcpu_num_used = 1;
}
return 0;
}
__initcall(percpu_modinit);
#else /* ... !CONFIG_SMP */
static inline void *percpu_modalloc(unsigned long size, unsigned long align)
{
return NULL;
}
static inline void percpu_modfree(void *pcpuptr)
{
BUG();
}
static inline unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
{
return 0;
}
static inline void percpu_modcopy(void *pcpudst, const void *src,
unsigned long size)
{
/* pcpusec should be 0, and size of that section should be 0. */
BUG_ON(size != 0);
}
#endif /* CONFIG_SMP */
#ifdef CONFIG_MODULE_UNLOAD #ifdef CONFIG_MODULE_UNLOAD
/* Init the unload section of the module. */ /* Init the unload section of the module. */
static void module_unload_init(struct module *mod) static void module_unload_init(struct module *mod)
...@@ -913,6 +1074,8 @@ static void free_module(struct module *mod) ...@@ -913,6 +1074,8 @@ static void free_module(struct module *mod)
/* This may be NULL, but that's OK */ /* This may be NULL, but that's OK */
module_free(mod, mod->module_init); module_free(mod, mod->module_init);
kfree(mod->args); kfree(mod->args);
if (mod->percpu)
percpu_modfree(mod->percpu);
/* Finally, free the core (containing the module structure) */ /* Finally, free the core (containing the module structure) */
module_free(mod, mod->module_core); module_free(mod, mod->module_core);
...@@ -939,10 +1102,11 @@ static int simplify_symbols(Elf_Shdr *sechdrs, ...@@ -939,10 +1102,11 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
unsigned int symindex, unsigned int symindex,
const char *strtab, const char *strtab,
unsigned int versindex, unsigned int versindex,
unsigned int pcpuindex,
struct module *mod) struct module *mod)
{ {
Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr; Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
unsigned long secbase;
unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym); unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
int ret = 0; int ret = 0;
...@@ -979,10 +1143,12 @@ static int simplify_symbols(Elf_Shdr *sechdrs, ...@@ -979,10 +1143,12 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
break; break;
default: default:
sym[i].st_value /* Divert to percpu allocation if a percpu var. */
= (unsigned long) if (sym[i].st_shndx == pcpuindex)
(sechdrs[sym[i].st_shndx].sh_addr secbase = (unsigned long)mod->percpu;
+ sym[i].st_value); else
secbase = sechdrs[sym[i].st_shndx].sh_addr;
sym[i].st_value += secbase;
break; break;
} }
} }
...@@ -1119,7 +1285,7 @@ static struct module *load_module(void __user *umod, ...@@ -1119,7 +1285,7 @@ static struct module *load_module(void __user *umod,
char *secstrings, *args, *modmagic, *strtab = NULL; char *secstrings, *args, *modmagic, *strtab = NULL;
unsigned int i, symindex = 0, strindex = 0, setupindex, exindex, unsigned int i, symindex = 0, strindex = 0, setupindex, exindex,
exportindex, modindex, obsparmindex, infoindex, gplindex, exportindex, modindex, obsparmindex, infoindex, gplindex,
crcindex, gplcrcindex, versindex; crcindex, gplcrcindex, versindex, pcpuindex;
long arglen; long arglen;
struct module *mod; struct module *mod;
long err = 0; long err = 0;
...@@ -1194,6 +1360,7 @@ static struct module *load_module(void __user *umod, ...@@ -1194,6 +1360,7 @@ static struct module *load_module(void __user *umod,
obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm"); obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
versindex = find_sec(hdr, sechdrs, secstrings, "__versions"); versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo"); infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
/* Don't keep modinfo section */ /* Don't keep modinfo section */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
...@@ -1250,6 +1417,17 @@ static struct module *load_module(void __user *umod, ...@@ -1250,6 +1417,17 @@ static struct module *load_module(void __user *umod,
if (err < 0) if (err < 0)
goto free_mod; goto free_mod;
if (pcpuindex) {
/* We have a special allocation for this section. */
mod->percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
sechdrs[pcpuindex].sh_addralign);
if (!mod->percpu) {
err = -ENOMEM;
goto free_mod;
}
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
}
/* Determine total sizes, and put offsets in sh_entsize. For now /* Determine total sizes, and put offsets in sh_entsize. For now
this is done generically; there doesn't appear to be any this is done generically; there doesn't appear to be any
special cases for the architectures. */ special cases for the architectures. */
...@@ -1259,7 +1437,7 @@ static struct module *load_module(void __user *umod, ...@@ -1259,7 +1437,7 @@ static struct module *load_module(void __user *umod,
ptr = module_alloc(mod->core_size); ptr = module_alloc(mod->core_size);
if (!ptr) { if (!ptr) {
err = -ENOMEM; err = -ENOMEM;
goto free_mod; goto free_percpu;
} }
memset(ptr, 0, mod->core_size); memset(ptr, 0, mod->core_size);
mod->module_core = ptr; mod->module_core = ptr;
...@@ -1303,7 +1481,8 @@ static struct module *load_module(void __user *umod, ...@@ -1303,7 +1481,8 @@ static struct module *load_module(void __user *umod,
set_license(mod, get_modinfo(sechdrs, infoindex, "license")); set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
/* Fix up syms, so that st_value is a pointer to location. */ /* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(sechdrs, symindex, strtab, versindex, mod); err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex,
mod);
if (err < 0) if (err < 0)
goto cleanup; goto cleanup;
...@@ -1342,6 +1521,10 @@ static struct module *load_module(void __user *umod, ...@@ -1342,6 +1521,10 @@ static struct module *load_module(void __user *umod,
goto cleanup; goto cleanup;
} }
/* Finally, copy percpu area over. */
percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
sechdrs[pcpuindex].sh_size);
#ifdef CONFIG_KALLSYMS #ifdef CONFIG_KALLSYMS
mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->symtab = (void *)sechdrs[symindex].sh_addr;
mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
...@@ -1383,6 +1566,9 @@ static struct module *load_module(void __user *umod, ...@@ -1383,6 +1566,9 @@ static struct module *load_module(void __user *umod,
module_free(mod, mod->module_init); module_free(mod, mod->module_init);
free_core: free_core:
module_free(mod, mod->module_core); module_free(mod, mod->module_core);
free_percpu:
if (mod->percpu)
percpu_modfree(mod->percpu);
free_mod: free_mod:
kfree(args); kfree(args);
free_hdr: free_hdr:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment