Commit 0fc0531e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
  percpu: update comments to reflect that percpu allocations are always zero-filled
  percpu: Optimize __get_cpu_var()
  x86, percpu: Optimize this_cpu_ptr
  percpu: clear memory allocated with the km allocator
  percpu: fix build breakage on s390 and cleanup build configuration tests
  percpu: use percpu allocator on UP too
  percpu: reduce PCPU_MIN_UNIT_SIZE to 32k
  vmalloc: pcpu_get/free_vm_areas() aren't needed on UP

Fixed up trivial conflicts in include/linux/percpu.h
parents 91b74501 9329ba97
...@@ -47,6 +47,20 @@ ...@@ -47,6 +47,20 @@
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
#define __my_cpu_offset percpu_read(this_cpu_off) #define __my_cpu_offset percpu_read(this_cpu_off)
/*
* Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register.
*/
#define __this_cpu_ptr(ptr) \
({ \
unsigned long tcp_ptr__; \
__verify_pcpu_ptr(ptr); \
asm volatile("add " __percpu_arg(1) ", %0" \
: "=r" (tcp_ptr__) \
: "m" (this_cpu_off), "0" (ptr)); \
(typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
})
#else #else
#define __percpu_arg(x) "%P" #x #define __percpu_arg(x) "%P" #x
#endif #endif
......
...@@ -55,14 +55,18 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; ...@@ -55,14 +55,18 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
*/ */
#define per_cpu(var, cpu) \ #define per_cpu(var, cpu) \
(*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu)))
#define __get_cpu_var(var) \
(*SHIFT_PERCPU_PTR(&(var), my_cpu_offset))
#define __raw_get_cpu_var(var) \
(*SHIFT_PERCPU_PTR(&(var), __my_cpu_offset))
#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) #ifndef __this_cpu_ptr
#define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) #define __this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset)
#endif
#ifdef CONFIG_DEBUG_PREEMPT
#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset)
#else
#define this_cpu_ptr(ptr) __this_cpu_ptr(ptr)
#endif
#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
#define __raw_get_cpu_var(var) (*__this_cpu_ptr(&(var)))
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
extern void setup_per_cpu_areas(void); extern void setup_per_cpu_areas(void);
......
...@@ -48,10 +48,8 @@ ...@@ -48,10 +48,8 @@
preempt_enable(); \ preempt_enable(); \
} while (0) } while (0)
#ifdef CONFIG_SMP
/* minimum unit size, also is the maximum supported allocation size */ /* minimum unit size, also is the maximum supported allocation size */
#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10)
/* /*
* Percpu allocator can serve percpu allocations before slab is * Percpu allocator can serve percpu allocations before slab is
...@@ -146,37 +144,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, ...@@ -146,37 +144,20 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
* dynamically allocated. Non-atomic access to the current CPU's * dynamically allocated. Non-atomic access to the current CPU's
* version should probably be combined with get_cpu()/put_cpu(). * version should probably be combined with get_cpu()/put_cpu().
*/ */
#ifdef CONFIG_SMP
#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
#else
#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
#endif
extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
extern bool is_kernel_percpu_address(unsigned long addr); extern bool is_kernel_percpu_address(unsigned long addr);
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
extern void __init setup_per_cpu_areas(void); extern void __init setup_per_cpu_areas(void);
#endif #endif
extern void __init percpu_init_late(void); extern void __init percpu_init_late(void);
#else /* CONFIG_SMP */
#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); })
/* can't distinguish from other static vars, always false */
static inline bool is_kernel_percpu_address(unsigned long addr)
{
return false;
}
static inline void __init setup_per_cpu_areas(void) { }
static inline void __init percpu_init_late(void) { }
static inline void *pcpu_lpage_remapped(void *kaddr)
{
return NULL;
}
#endif /* CONFIG_SMP */
extern void __percpu *__alloc_percpu(size_t size, size_t align); extern void __percpu *__alloc_percpu(size_t size, size_t align);
extern void free_percpu(void __percpu *__pdata); extern void free_percpu(void __percpu *__pdata);
extern phys_addr_t per_cpu_ptr_to_phys(void *addr); extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
......
...@@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock; ...@@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock;
extern struct vm_struct *vmlist; extern struct vm_struct *vmlist;
extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
#ifdef CONFIG_SMP
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms, const size_t *sizes, int nr_vms,
size_t align, gfp_t gfp_mask); size_t align, gfp_t gfp_mask);
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
#endif
#endif /* _LINUX_VMALLOC_H */ #endif /* _LINUX_VMALLOC_H */
...@@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS ...@@ -301,3 +301,11 @@ config NOMMU_INITIAL_TRIM_EXCESS
of 1 says that all excess pages should be trimmed. of 1 says that all excess pages should be trimmed.
See Documentation/nommu-mmap.txt for more information. See Documentation/nommu-mmap.txt for more information.
#
# UP and nommu archs use km based percpu allocator
#
config NEED_PER_CPU_KM
depends on !SMP
bool
default y
...@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ ...@@ -11,7 +11,7 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
maccess.o page_alloc.o page-writeback.o \ maccess.o page_alloc.o page-writeback.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
page_isolation.o mm_init.o mmu_context.o \ page_isolation.o mm_init.o mmu_context.o percpu.o \
$(mmu-y) $(mmu-y)
obj-y += init-mm.o obj-y += init-mm.o
...@@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o ...@@ -36,11 +36,6 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_MIGRATION) += migrate.o
ifdef CONFIG_SMP
obj-y += percpu.o
else
obj-y += percpu_up.o
endif
obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
* chunk size is not aligned. percpu-km code will whine about it. * chunk size is not aligned. percpu-km code will whine about it.
*/ */
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK #if defined(CONFIG_SMP) && defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
#error "contiguous percpu allocation is incompatible with paged first chunk" #error "contiguous percpu allocation is incompatible with paged first chunk"
#endif #endif
...@@ -35,7 +35,11 @@ ...@@ -35,7 +35,11 @@
static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
{ {
/* noop */ unsigned int cpu;
for_each_possible_cpu(cpu)
memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
return 0; return 0;
} }
......
...@@ -76,6 +76,7 @@ ...@@ -76,6 +76,7 @@
#define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */
#define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */
#ifdef CONFIG_SMP
/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
#ifndef __addr_to_pcpu_ptr #ifndef __addr_to_pcpu_ptr
#define __addr_to_pcpu_ptr(addr) \ #define __addr_to_pcpu_ptr(addr) \
...@@ -89,6 +90,11 @@ ...@@ -89,6 +90,11 @@
(unsigned long)pcpu_base_addr - \ (unsigned long)pcpu_base_addr - \
(unsigned long)__per_cpu_start) (unsigned long)__per_cpu_start)
#endif #endif
#else /* CONFIG_SMP */
/* on UP, it's always identity mapped */
#define __addr_to_pcpu_ptr(addr) (void __percpu *)(addr)
#define __pcpu_ptr_to_addr(ptr) (void __force *)(ptr)
#endif /* CONFIG_SMP */
struct pcpu_chunk { struct pcpu_chunk {
struct list_head list; /* linked to pcpu_slot lists */ struct list_head list; /* linked to pcpu_slot lists */
...@@ -820,8 +826,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) ...@@ -820,8 +826,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
* @size: size of area to allocate in bytes * @size: size of area to allocate in bytes
* @align: alignment of area (max PAGE_SIZE) * @align: alignment of area (max PAGE_SIZE)
* *
* Allocate percpu area of @size bytes aligned at @align. Might * Allocate zero-filled percpu area of @size bytes aligned at @align.
* sleep. Might trigger writeouts. * Might sleep. Might trigger writeouts.
* *
* CONTEXT: * CONTEXT:
* Does GFP_KERNEL allocation. * Does GFP_KERNEL allocation.
...@@ -840,9 +846,10 @@ EXPORT_SYMBOL_GPL(__alloc_percpu); ...@@ -840,9 +846,10 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
* @size: size of area to allocate in bytes * @size: size of area to allocate in bytes
* @align: alignment of area (max PAGE_SIZE) * @align: alignment of area (max PAGE_SIZE)
* *
* Allocate percpu area of @size bytes aligned at @align from reserved * Allocate zero-filled percpu area of @size bytes aligned at @align
* percpu area if arch has set it up; otherwise, allocation is served * from reserved percpu area if arch has set it up; otherwise,
* from the same dynamic area. Might sleep. Might trigger writeouts. * allocation is served from the same dynamic area. Might sleep.
* Might trigger writeouts.
* *
* CONTEXT: * CONTEXT:
* Does GFP_KERNEL allocation. * Does GFP_KERNEL allocation.
...@@ -949,6 +956,7 @@ EXPORT_SYMBOL_GPL(free_percpu); ...@@ -949,6 +956,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
*/ */
bool is_kernel_percpu_address(unsigned long addr) bool is_kernel_percpu_address(unsigned long addr)
{ {
#ifdef CONFIG_SMP
const size_t static_size = __per_cpu_end - __per_cpu_start; const size_t static_size = __per_cpu_end - __per_cpu_start;
void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
unsigned int cpu; unsigned int cpu;
...@@ -959,6 +967,8 @@ bool is_kernel_percpu_address(unsigned long addr) ...@@ -959,6 +967,8 @@ bool is_kernel_percpu_address(unsigned long addr)
if ((void *)addr >= start && (void *)addr < start + static_size) if ((void *)addr >= start && (void *)addr < start + static_size)
return true; return true;
} }
#endif
/* on UP, can't distinguish from other static vars, always false */
return false; return false;
} }
...@@ -1066,161 +1076,6 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai) ...@@ -1066,161 +1076,6 @@ void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
free_bootmem(__pa(ai), ai->__ai_size); free_bootmem(__pa(ai), ai->__ai_size);
} }
/**
* pcpu_build_alloc_info - build alloc_info considering distances between CPUs
* @reserved_size: the size of reserved percpu area in bytes
* @dyn_size: minimum free size for dynamic allocation in bytes
* @atom_size: allocation atom size
* @cpu_distance_fn: callback to determine distance between cpus, optional
*
* This function determines grouping of units, their mappings to cpus
* and other parameters considering needed percpu size, allocation
* atom size and distances between CPUs.
*
* Groups are always mutliples of atom size and CPUs which are of
* LOCAL_DISTANCE both ways are grouped together and share space for
* units in the same group. The returned configuration is guaranteed
* to have CPUs on different nodes on different groups and >=75% usage
* of allocated virtual address space.
*
* RETURNS:
* On success, pointer to the new allocation_info is returned. On
* failure, ERR_PTR value is returned.
*/
static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
{
static int group_map[NR_CPUS] __initdata;
static int group_cnt[NR_CPUS] __initdata;
const size_t static_size = __per_cpu_end - __per_cpu_start;
int nr_groups = 1, nr_units = 0;
size_t size_sum, min_unit_size, alloc_size;
int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
int last_allocs, group, unit;
unsigned int cpu, tcpu;
struct pcpu_alloc_info *ai;
unsigned int *cpu_map;
/* this function may be called multiple times */
memset(group_map, 0, sizeof(group_map));
memset(group_cnt, 0, sizeof(group_cnt));
/* calculate size_sum and ensure dyn_size is enough for early alloc */
size_sum = PFN_ALIGN(static_size + reserved_size +
max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
dyn_size = size_sum - static_size - reserved_size;
/*
* Determine min_unit_size, alloc_size and max_upa such that
* alloc_size is multiple of atom_size and is the smallest
* which can accomodate 4k aligned segments which are equal to
* or larger than min_unit_size.
*/
min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
alloc_size = roundup(min_unit_size, atom_size);
upa = alloc_size / min_unit_size;
while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
upa--;
max_upa = upa;
/* group cpus according to their proximity */
for_each_possible_cpu(cpu) {
group = 0;
next_group:
for_each_possible_cpu(tcpu) {
if (cpu == tcpu)
break;
if (group_map[tcpu] == group && cpu_distance_fn &&
(cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
group++;
nr_groups = max(nr_groups, group + 1);
goto next_group;
}
}
group_map[cpu] = group;
group_cnt[group]++;
}
/*
* Expand unit size until address space usage goes over 75%
* and then as much as possible without using more address
* space.
*/
last_allocs = INT_MAX;
for (upa = max_upa; upa; upa--) {
int allocs = 0, wasted = 0;
if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
continue;
for (group = 0; group < nr_groups; group++) {
int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
allocs += this_allocs;
wasted += this_allocs * upa - group_cnt[group];
}
/*
* Don't accept if wastage is over 1/3. The
* greater-than comparison ensures upa==1 always
* passes the following check.
*/
if (wasted > num_possible_cpus() / 3)
continue;
/* and then don't consume more memory */
if (allocs > last_allocs)
break;
last_allocs = allocs;
best_upa = upa;
}
upa = best_upa;
/* allocate and fill alloc_info */
for (group = 0; group < nr_groups; group++)
nr_units += roundup(group_cnt[group], upa);
ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
if (!ai)
return ERR_PTR(-ENOMEM);
cpu_map = ai->groups[0].cpu_map;
for (group = 0; group < nr_groups; group++) {
ai->groups[group].cpu_map = cpu_map;
cpu_map += roundup(group_cnt[group], upa);
}
ai->static_size = static_size;
ai->reserved_size = reserved_size;
ai->dyn_size = dyn_size;
ai->unit_size = alloc_size / upa;
ai->atom_size = atom_size;
ai->alloc_size = alloc_size;
for (group = 0, unit = 0; group_cnt[group]; group++) {
struct pcpu_group_info *gi = &ai->groups[group];
/*
* Initialize base_offset as if all groups are located
* back-to-back. The caller should update this to
* reflect actual allocation.
*/
gi->base_offset = unit * ai->unit_size;
for_each_possible_cpu(cpu)
if (group_map[cpu] == group)
gi->cpu_map[gi->nr_units++] = cpu;
gi->nr_units = roundup(gi->nr_units, upa);
unit += gi->nr_units;
}
BUG_ON(unit != nr_units);
return ai;
}
/** /**
* pcpu_dump_alloc_info - print out information about pcpu_alloc_info * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
* @lvl: loglevel * @lvl: loglevel
...@@ -1363,7 +1218,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1363,7 +1218,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
/* sanity checks */ /* sanity checks */
PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); PCPU_SETUP_BUG_ON(ai->nr_groups <= 0);
#ifdef CONFIG_SMP
PCPU_SETUP_BUG_ON(!ai->static_size); PCPU_SETUP_BUG_ON(!ai->static_size);
#endif
PCPU_SETUP_BUG_ON(!base_addr); PCPU_SETUP_BUG_ON(!base_addr);
PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); PCPU_SETUP_BUG_ON(ai->unit_size < size_sum);
PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK);
...@@ -1488,6 +1345,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, ...@@ -1488,6 +1345,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
return 0; return 0;
} }
#ifdef CONFIG_SMP
const char *pcpu_fc_names[PCPU_FC_NR] __initdata = { const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
[PCPU_FC_AUTO] = "auto", [PCPU_FC_AUTO] = "auto",
[PCPU_FC_EMBED] = "embed", [PCPU_FC_EMBED] = "embed",
...@@ -1515,8 +1374,180 @@ static int __init percpu_alloc_setup(char *str) ...@@ -1515,8 +1374,180 @@ static int __init percpu_alloc_setup(char *str)
} }
early_param("percpu_alloc", percpu_alloc_setup); early_param("percpu_alloc", percpu_alloc_setup);
/*
* pcpu_embed_first_chunk() is used by the generic percpu setup.
* Build it if needed by the arch config or the generic setup is going
* to be used.
*/
#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \ #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
#define BUILD_EMBED_FIRST_CHUNK
#endif
/* build pcpu_page_first_chunk() iff needed by the arch config */
#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK)
#define BUILD_PAGE_FIRST_CHUNK
#endif
/* pcpu_build_alloc_info() is used by both embed and page first chunk */
#if defined(BUILD_EMBED_FIRST_CHUNK) || defined(BUILD_PAGE_FIRST_CHUNK)
/**
* pcpu_build_alloc_info - build alloc_info considering distances between CPUs
* @reserved_size: the size of reserved percpu area in bytes
* @dyn_size: minimum free size for dynamic allocation in bytes
* @atom_size: allocation atom size
* @cpu_distance_fn: callback to determine distance between cpus, optional
*
* This function determines grouping of units, their mappings to cpus
* and other parameters considering needed percpu size, allocation
* atom size and distances between CPUs.
*
* Groups are always mutliples of atom size and CPUs which are of
* LOCAL_DISTANCE both ways are grouped together and share space for
* units in the same group. The returned configuration is guaranteed
* to have CPUs on different nodes on different groups and >=75% usage
* of allocated virtual address space.
*
* RETURNS:
* On success, pointer to the new allocation_info is returned. On
* failure, ERR_PTR value is returned.
*/
static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
{
static int group_map[NR_CPUS] __initdata;
static int group_cnt[NR_CPUS] __initdata;
const size_t static_size = __per_cpu_end - __per_cpu_start;
int nr_groups = 1, nr_units = 0;
size_t size_sum, min_unit_size, alloc_size;
int upa, max_upa, uninitialized_var(best_upa); /* units_per_alloc */
int last_allocs, group, unit;
unsigned int cpu, tcpu;
struct pcpu_alloc_info *ai;
unsigned int *cpu_map;
/* this function may be called multiple times */
memset(group_map, 0, sizeof(group_map));
memset(group_cnt, 0, sizeof(group_cnt));
/* calculate size_sum and ensure dyn_size is enough for early alloc */
size_sum = PFN_ALIGN(static_size + reserved_size +
max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE));
dyn_size = size_sum - static_size - reserved_size;
/*
* Determine min_unit_size, alloc_size and max_upa such that
* alloc_size is multiple of atom_size and is the smallest
* which can accomodate 4k aligned segments which are equal to
* or larger than min_unit_size.
*/
min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
alloc_size = roundup(min_unit_size, atom_size);
upa = alloc_size / min_unit_size;
while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
upa--;
max_upa = upa;
/* group cpus according to their proximity */
for_each_possible_cpu(cpu) {
group = 0;
next_group:
for_each_possible_cpu(tcpu) {
if (cpu == tcpu)
break;
if (group_map[tcpu] == group && cpu_distance_fn &&
(cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
group++;
nr_groups = max(nr_groups, group + 1);
goto next_group;
}
}
group_map[cpu] = group;
group_cnt[group]++;
}
/*
* Expand unit size until address space usage goes over 75%
* and then as much as possible without using more address
* space.
*/
last_allocs = INT_MAX;
for (upa = max_upa; upa; upa--) {
int allocs = 0, wasted = 0;
if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
continue;
for (group = 0; group < nr_groups; group++) {
int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
allocs += this_allocs;
wasted += this_allocs * upa - group_cnt[group];
}
/*
* Don't accept if wastage is over 1/3. The
* greater-than comparison ensures upa==1 always
* passes the following check.
*/
if (wasted > num_possible_cpus() / 3)
continue;
/* and then don't consume more memory */
if (allocs > last_allocs)
break;
last_allocs = allocs;
best_upa = upa;
}
upa = best_upa;
/* allocate and fill alloc_info */
for (group = 0; group < nr_groups; group++)
nr_units += roundup(group_cnt[group], upa);
ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
if (!ai)
return ERR_PTR(-ENOMEM);
cpu_map = ai->groups[0].cpu_map;
for (group = 0; group < nr_groups; group++) {
ai->groups[group].cpu_map = cpu_map;
cpu_map += roundup(group_cnt[group], upa);
}
ai->static_size = static_size;
ai->reserved_size = reserved_size;
ai->dyn_size = dyn_size;
ai->unit_size = alloc_size / upa;
ai->atom_size = atom_size;
ai->alloc_size = alloc_size;
for (group = 0, unit = 0; group_cnt[group]; group++) {
struct pcpu_group_info *gi = &ai->groups[group];
/*
* Initialize base_offset as if all groups are located
* back-to-back. The caller should update this to
* reflect actual allocation.
*/
gi->base_offset = unit * ai->unit_size;
for_each_possible_cpu(cpu)
if (group_map[cpu] == group)
gi->cpu_map[gi->nr_units++] = cpu;
gi->nr_units = roundup(gi->nr_units, upa);
unit += gi->nr_units;
}
BUG_ON(unit != nr_units);
return ai;
}
#endif /* BUILD_EMBED_FIRST_CHUNK || BUILD_PAGE_FIRST_CHUNK */
#if defined(BUILD_EMBED_FIRST_CHUNK)
/** /**
* pcpu_embed_first_chunk - embed the first percpu chunk into bootmem * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
* @reserved_size: the size of reserved percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes
...@@ -1645,10 +1676,9 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, ...@@ -1645,10 +1676,9 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
free_bootmem(__pa(areas), areas_size); free_bootmem(__pa(areas), areas_size);
return rc; return rc;
} }
#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK || #endif /* BUILD_EMBED_FIRST_CHUNK */
!CONFIG_HAVE_SETUP_PER_CPU_AREA */
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK #ifdef BUILD_PAGE_FIRST_CHUNK
/** /**
* pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
* @reserved_size: the size of reserved percpu area in bytes * @reserved_size: the size of reserved percpu area in bytes
...@@ -1756,10 +1786,11 @@ int __init pcpu_page_first_chunk(size_t reserved_size, ...@@ -1756,10 +1786,11 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
pcpu_free_alloc_info(ai); pcpu_free_alloc_info(ai);
return rc; return rc;
} }
#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */ #endif /* BUILD_PAGE_FIRST_CHUNK */
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
/* /*
* Generic percpu area setup. * Generic SMP percpu area setup.
* *
* The embedding helper is used because its behavior closely resembles * The embedding helper is used because its behavior closely resembles
* the original non-dynamic generic percpu area setup. This is * the original non-dynamic generic percpu area setup. This is
...@@ -1770,7 +1801,6 @@ int __init pcpu_page_first_chunk(size_t reserved_size, ...@@ -1770,7 +1801,6 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
* on the physical linear memory mapping which uses large page * on the physical linear memory mapping which uses large page
* mappings on applicable archs. * mappings on applicable archs.
*/ */
#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset); EXPORT_SYMBOL(__per_cpu_offset);
...@@ -1799,13 +1829,48 @@ void __init setup_per_cpu_areas(void) ...@@ -1799,13 +1829,48 @@ void __init setup_per_cpu_areas(void)
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL, PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
pcpu_dfl_fc_alloc, pcpu_dfl_fc_free); pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
if (rc < 0) if (rc < 0)
panic("Failed to initialized percpu areas."); panic("Failed to initialize percpu areas.");
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
} }
#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
#else /* CONFIG_SMP */
/*
* UP percpu area setup.
*
* UP always uses km-based percpu allocator with identity mapping.
* Static percpu variables are indistinguishable from the usual static
* variables and don't require any special preparation.
*/
void __init setup_per_cpu_areas(void)
{
const size_t unit_size =
roundup_pow_of_two(max_t(size_t, PCPU_MIN_UNIT_SIZE,
PERCPU_DYNAMIC_RESERVE));
struct pcpu_alloc_info *ai;
void *fc;
ai = pcpu_alloc_alloc_info(1, 1);
fc = __alloc_bootmem(unit_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
if (!ai || !fc)
panic("Failed to allocate memory for percpu areas.");
ai->dyn_size = unit_size;
ai->unit_size = unit_size;
ai->atom_size = unit_size;
ai->alloc_size = unit_size;
ai->groups[0].nr_units = 1;
ai->groups[0].cpu_map[0] = 0;
if (pcpu_setup_first_chunk(ai, fc) < 0)
panic("Failed to initialize percpu areas.");
}
#endif /* CONFIG_SMP */
/* /*
* First and reserved chunks are initialized with temporary allocation * First and reserved chunks are initialized with temporary allocation
......
/*
* mm/percpu_up.c - dummy percpu memory allocator implementation for UP
*/
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/slab.h>
void __percpu *__alloc_percpu(size_t size, size_t align)
{
/*
* Can't easily make larger alignment work with kmalloc. WARN
* on it. Larger alignment should only be used for module
* percpu sections on SMP for which this path isn't used.
*/
WARN_ON_ONCE(align > SMP_CACHE_BYTES);
return (void __percpu __force *)kzalloc(size, GFP_KERNEL);
}
EXPORT_SYMBOL_GPL(__alloc_percpu);
void free_percpu(void __percpu *p)
{
kfree(this_cpu_ptr(p));
}
EXPORT_SYMBOL_GPL(free_percpu);
phys_addr_t per_cpu_ptr_to_phys(void *addr)
{
return __pa(addr);
}
...@@ -2065,6 +2065,7 @@ void free_vm_area(struct vm_struct *area) ...@@ -2065,6 +2065,7 @@ void free_vm_area(struct vm_struct *area)
} }
EXPORT_SYMBOL_GPL(free_vm_area); EXPORT_SYMBOL_GPL(free_vm_area);
#ifdef CONFIG_SMP
static struct vmap_area *node_to_va(struct rb_node *n) static struct vmap_area *node_to_va(struct rb_node *n)
{ {
return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
...@@ -2345,6 +2346,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) ...@@ -2345,6 +2346,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
free_vm_area(vms[i]); free_vm_area(vms[i]);
kfree(vms); kfree(vms);
} }
#endif /* CONFIG_SMP */
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
static void *s_start(struct seq_file *m, loff_t *pos) static void *s_start(struct seq_file *m, loff_t *pos)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment