Commit 0edcf8d6 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu

Conflicts:
	arch/x86/include/asm/pgtable.h
parents 87b20307 40150d37
......@@ -189,9 +189,21 @@ callback_init(void * kernel_end)
if (alpha_using_srm) {
static struct vm_struct console_remap_vm;
unsigned long vaddr = VMALLOC_START;
unsigned long nr_pages = 0;
unsigned long vaddr;
unsigned long i, j;
/* calculate needed size */
for (i = 0; i < crb->map_entries; ++i)
nr_pages += crb->map[i].count;
/* register the vm area */
console_remap_vm.flags = VM_ALLOC;
console_remap_vm.size = nr_pages << PAGE_SHIFT;
vm_area_register_early(&console_remap_vm, PAGE_SIZE);
vaddr = (unsigned long)consle_remap_vm.addr;
/* Set up the third level PTEs and update the virtual
addresses of the CRB entries. */
for (i = 0; i < crb->map_entries; ++i) {
......@@ -213,12 +225,6 @@ callback_init(void * kernel_end)
vaddr += PAGE_SIZE;
}
}
/* Let vmalloc know that we've allocated some space. */
console_remap_vm.flags = VM_ALLOC;
console_remap_vm.addr = (void *) VMALLOC_START;
console_remap_vm.size = vaddr - VMALLOC_START;
vmlist = &console_remap_vm;
}
callback_init_done = 1;
......
......@@ -181,7 +181,7 @@ source "kernel/Kconfig.preempt"
config QUICKLIST
def_bool y
config HAVE_ARCH_BOOTMEM_NODE
config HAVE_ARCH_BOOTMEM
def_bool n
config ARCH_HAVE_MEMORY_PRESENT
......
......@@ -135,6 +135,9 @@ config ARCH_HAS_CACHE_LINE_SIZE
config HAVE_SETUP_PER_CPU_AREA
def_bool y
config HAVE_DYNAMIC_PER_CPU_AREA
def_bool y
config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP
......@@ -1122,7 +1125,7 @@ config NODES_SHIFT
Specify the maximum number of NUMA Nodes available on the target
system. Increases memory reserved to accomodate various tables.
config HAVE_ARCH_BOOTMEM_NODE
config HAVE_ARCH_BOOTMEM
def_bool y
depends on X86_32 && NUMA
......
......@@ -91,45 +91,12 @@ static inline int pfn_valid(int pfn)
#endif /* CONFIG_DISCONTIGMEM */
#ifdef CONFIG_NEED_MULTIPLE_NODES
/*
* Following are macros that are specific to this numa platform.
*/
#define reserve_bootmem(addr, size, flags) \
reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags))
#define alloc_bootmem(x) \
__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_nopanic(x) \
__alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \
__pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low(x) \
__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0)
#define alloc_bootmem_pages(x) \
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_nopanic(x) \
__alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \
__pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages(x) \
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0)
#define alloc_bootmem_node(pgdat, x) \
({ \
struct pglist_data __maybe_unused \
*__alloc_bootmem_node__pgdat = (pgdat); \
__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \
__pa(MAX_DMA_ADDRESS)); \
})
#define alloc_bootmem_pages_node(pgdat, x) \
({ \
struct pglist_data __maybe_unused \
*__alloc_bootmem_node__pgdat = (pgdat); \
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \
__pa(MAX_DMA_ADDRESS)); \
})
#define alloc_bootmem_low_pages_node(pgdat, x) \
/* always use node 0 for bootmem on this numa platform */
#define alloc_bootmem_core(__bdata, size, align, goal, limit) \
({ \
struct pglist_data __maybe_unused \
*__alloc_bootmem_node__pgdat = (pgdat); \
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \
bootmem_data_t __maybe_unused * __abm_bdata_dummy = (__bdata); \
__alloc_bootmem_core(NODE_DATA(0)->bdata, \
(size), (align), (goal), (limit)); \
})
#endif /* CONFIG_NEED_MULTIPLE_NODES */
......
......@@ -43,6 +43,14 @@
#else /* ...!ASSEMBLY */
#include <linux/stringify.h>
#include <asm/sections.h>
#define __addr_to_pcpu_ptr(addr) \
(void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \
+ (unsigned long)__per_cpu_start)
#define __pcpu_ptr_to_addr(ptr) \
(void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \
- (unsigned long)__per_cpu_start)
#ifdef CONFIG_SMP
#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
......
......@@ -288,6 +288,8 @@ static inline int is_new_memtype_allowed(unsigned long flags,
return 1;
}
pmd_t *populate_extra_pmd(unsigned long vaddr);
pte_t *populate_extra_pte(unsigned long vaddr);
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_X86_32
......
......@@ -601,7 +601,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
if (!data)
return -ENOMEM;
data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
per_cpu(drv_data, cpu) = data;
if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
......
......@@ -16,6 +16,7 @@
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/uaccess.h>
#include <linux/percpu.h>
#include <asm/apic.h>
......@@ -55,13 +56,13 @@ static inline void print_stack_overflow(void) { }
union irq_ctx {
struct thread_info tinfo;
u32 stack[THREAD_SIZE/sizeof(u32)];
};
} __attribute__((aligned(PAGE_SIZE)));
static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack);
static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack);
static void call_on_stack(void *func, void *stack)
{
......@@ -81,7 +82,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
u32 *isp, arg1, arg2;
curctx = (union irq_ctx *) current_thread_info();
irqctx = hardirq_ctx[smp_processor_id()];
irqctx = __get_cpu_var(hardirq_ctx);
/*
* this is where we switch to the IRQ stack. However, if we are
......@@ -125,34 +126,34 @@ void __cpuinit irq_ctx_init(int cpu)
{
union irq_ctx *irqctx;
if (hardirq_ctx[cpu])
if (per_cpu(hardirq_ctx, cpu))
return;
irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
irqctx = &per_cpu(hardirq_stack, cpu);
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = HARDIRQ_OFFSET;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
hardirq_ctx[cpu] = irqctx;
per_cpu(hardirq_ctx, cpu) = irqctx;
irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE];
irqctx = &per_cpu(softirq_stack, cpu);
irqctx->tinfo.task = NULL;
irqctx->tinfo.exec_domain = NULL;
irqctx->tinfo.cpu = cpu;
irqctx->tinfo.preempt_count = 0;
irqctx->tinfo.addr_limit = MAKE_MM_SEG(0);
softirq_ctx[cpu] = irqctx;
per_cpu(softirq_ctx, cpu) = irqctx;
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu));
}
void irq_ctx_exit(int cpu)
{
hardirq_ctx[cpu] = NULL;
per_cpu(hardirq_ctx, cpu) = NULL;
}
asmlinkage void do_softirq(void)
......@@ -169,7 +170,7 @@ asmlinkage void do_softirq(void)
if (local_softirq_pending()) {
curctx = current_thread_info();
irqctx = softirq_ctx[smp_processor_id()];
irqctx = __get_cpu_var(softirq_ctx);
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;
......
This diff is collapsed.
......@@ -137,6 +137,23 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
return pte_offset_kernel(pmd, 0);
}
pmd_t * __init populate_extra_pmd(unsigned long vaddr)
{
int pgd_idx = pgd_index(vaddr);
int pmd_idx = pmd_index(vaddr);
return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
}
pte_t * __init populate_extra_pte(unsigned long vaddr)
{
int pte_idx = pte_index(vaddr);
pmd_t *pmd;
pmd = populate_extra_pmd(vaddr);
return one_page_table_init(pmd) + pte_idx;
}
static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
unsigned long vaddr, pte_t *lastpte)
{
......
......@@ -168,34 +168,51 @@ static __ref void *spp_getpage(void)
return ptr;
}
void
set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
static pud_t * __init fill_pud(pgd_t *pgd, unsigned long vaddr)
{
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (pgd_none(*pgd)) {
pud_t *pud = (pud_t *)spp_getpage();
pgd_populate(&init_mm, pgd, pud);
if (pud != pud_offset(pgd, 0))
printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
pud, pud_offset(pgd, 0));
}
return pud_offset(pgd, vaddr);
}
pud = pud_page + pud_index(vaddr);
static pmd_t * __init fill_pmd(pud_t *pud, unsigned long vaddr)
{
if (pud_none(*pud)) {
pmd = (pmd_t *) spp_getpage();
pmd_t *pmd = (pmd_t *) spp_getpage();
pud_populate(&init_mm, pud, pmd);
if (pmd != pmd_offset(pud, 0)) {
if (pmd != pmd_offset(pud, 0))
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
pmd, pmd_offset(pud, 0));
return;
}
pmd, pmd_offset(pud, 0));
}
pmd = pmd_offset(pud, vaddr);
return pmd_offset(pud, vaddr);
}
static pte_t * __init fill_pte(pmd_t *pmd, unsigned long vaddr)
{
if (pmd_none(*pmd)) {
pte = (pte_t *) spp_getpage();
pte_t *pte = (pte_t *) spp_getpage();
pmd_populate_kernel(&init_mm, pmd, pte);
if (pte != pte_offset_kernel(pmd, 0)) {
if (pte != pte_offset_kernel(pmd, 0))
printk(KERN_ERR "PAGETABLE BUG #02!\n");
return;
}
}
return pte_offset_kernel(pmd, vaddr);
}
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
{
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pud = pud_page + pud_index(vaddr);
pmd = fill_pmd(pud, vaddr);
pte = fill_pte(pmd, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
set_pte(pte, new_pte);
/*
......@@ -205,8 +222,7 @@ set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
__flush_tlb_one(vaddr);
}
void
set_pte_vaddr(unsigned long vaddr, pte_t pteval)
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
pgd_t *pgd;
pud_t *pud_page;
......@@ -223,6 +239,24 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval)
set_pte_vaddr_pud(pud_page, vaddr, pteval);
}
pmd_t * __init populate_extra_pmd(unsigned long vaddr)
{
pgd_t *pgd;
pud_t *pud;
pgd = pgd_offset_k(vaddr);
pud = fill_pud(pgd, vaddr);
return fill_pmd(pud, vaddr);
}
pte_t * __init populate_extra_pte(unsigned long vaddr)
{
pmd_t *pmd;
pmd = populate_extra_pmd(vaddr);
return fill_pte(pmd, vaddr);
}
/*
* Create large page table mappings for a range of physical addresses.
*/
......
......@@ -363,7 +363,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!bt->sequence)
goto err;
bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
if (!bt->msg_data)
goto err;
......
......@@ -516,12 +516,12 @@ int acpi_processor_preregister_performance(
continue;
}
if (!performance || !percpu_ptr(performance, i)) {
if (!performance || !per_cpu_ptr(performance, i)) {
retval = -EINVAL;
continue;
}
pr->performance = percpu_ptr(performance, i);
pr->performance = per_cpu_ptr(performance, i);
cpumask_set_cpu(i, pr->performance->shared_cpu_map);
if (acpi_processor_get_psd(pr)) {
retval = -EINVAL;
......
......@@ -65,23 +65,20 @@ extern void free_bootmem(unsigned long addr, unsigned long size);
#define BOOTMEM_DEFAULT 0
#define BOOTMEM_EXCLUSIVE (1<<0)
extern int reserve_bootmem(unsigned long addr,
unsigned long size,
int flags);
extern int reserve_bootmem_node(pg_data_t *pgdat,
unsigned long physaddr,
unsigned long size,
int flags);
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
#endif
unsigned long physaddr,
unsigned long size,
int flags);
extern void *__alloc_bootmem_nopanic(unsigned long size,
extern void *__alloc_bootmem(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem(unsigned long size,
extern void *__alloc_bootmem_nopanic(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_low(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
......@@ -90,30 +87,35 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_low(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
#define alloc_bootmem(x) \
__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_nopanic(x) \
__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low(x) \
__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
#define alloc_bootmem_pages(x) \
__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_nopanic(x) \
__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages(x) \
__alloc_bootmem_low(x, PAGE_SIZE, 0)
#define alloc_bootmem_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low(x) \
__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
#define alloc_bootmem_low_pages(x) \
__alloc_bootmem_low(x, PAGE_SIZE, 0)
#define alloc_bootmem_low_pages_node(pgdat, x) \
__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
int flags);
......
......@@ -76,52 +76,98 @@
#ifdef CONFIG_SMP
#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
/* minimum unit size, also is the maximum supported allocation size */
#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT)
/*
* PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
* back on the first chunk if arch is manually allocating and mapping
* it for faster access (as a part of large page mapping for example).
* Note that dynamic percpu allocator covers both static and dynamic
* areas, so these values are bigger than PERCPU_MODULE_RESERVE.
*
* On typical configuration with modules, the following values leave
* about 8k of free space on the first chunk after boot on both x86_32
* and 64 when module support is enabled. When module support is
* disabled, it's much tighter.
*/
#ifndef PERCPU_DYNAMIC_RESERVE
# if BITS_PER_LONG > 32
# ifdef CONFIG_MODULES
# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT)
# else
# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT)
# endif
# else
# ifdef CONFIG_MODULES
# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT)
# else
# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT)
# endif
# endif
#endif /* PERCPU_DYNAMIC_RESERVE */
extern void *pcpu_base_addr;
typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr);
extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
size_t static_size, size_t unit_size,
size_t free_size, void *base_addr,
pcpu_populate_pte_fn_t populate_pte_fn);
/*
* Use this to get to a cpu's version of the per-cpu object
* dynamically allocated. Non-atomic access to the current CPU's
* version should probably be combined with get_cpu()/put_cpu().
*/
#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
struct percpu_data {
void *ptrs[1];
};
#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
/*
* Use this to get to a cpu's version of the per-cpu object dynamically
* allocated. Non-atomic access to the current CPU's version should
* probably be combined with get_cpu()/put_cpu().
*/
#define percpu_ptr(ptr, cpu) \
({ \
struct percpu_data *__p = __percpu_disguise(ptr); \
(__typeof__(ptr))__p->ptrs[(cpu)]; \
#define per_cpu_ptr(ptr, cpu) \
({ \
struct percpu_data *__p = __percpu_disguise(ptr); \
(__typeof__(ptr))__p->ptrs[(cpu)]; \
})
extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
extern void percpu_free(void *__pdata);
#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
extern void *__alloc_percpu(size_t size, size_t align);
extern void free_percpu(void *__pdata);
#else /* CONFIG_SMP */
#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
static inline void *__alloc_percpu(size_t size, size_t align)
{
/*
* Can't easily make larger alignment work with kmalloc. WARN
* on it. Larger alignment should only be used for module
* percpu sections on SMP for which this path isn't used.
*/
WARN_ON_ONCE(align > __alignof__(unsigned long long));
return kzalloc(size, gfp);
}
static inline void percpu_free(void *__pdata)
static inline void free_percpu(void *p)
{
kfree(__pdata);
kfree(p);
}
#endif /* CONFIG_SMP */
#define percpu_alloc_mask(size, gfp, mask) \
__percpu_alloc_mask((size), (gfp), &(mask))
#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
/* (legacy) interface for use without CPU hotplug handling */
#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \
cpu_possible_map)
#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type))
#define free_percpu(ptr) percpu_free((ptr))
#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \
__alignof__(type))
#endif /* __LINUX_PERCPU_H */
......@@ -95,6 +95,9 @@ extern struct vm_struct *remove_vm_area(const void *addr);
extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
struct page ***pages);
extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
pgprot_t prot, struct page **pages);
extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size);
extern void unmap_kernel_range(unsigned long addr, unsigned long size);
/* Allocate/destroy a 'vmalloc' VM area. */
......@@ -110,5 +113,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count);
*/
extern rwlock_t vmlist_lock;
extern struct vm_struct *vmlist;
extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
#endif /* _LINUX_VMALLOC_H */
......@@ -51,6 +51,7 @@
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <linux/async.h>
#include <linux/percpu.h>
#if 0
#define DEBUGP printk
......@@ -366,6 +367,34 @@ static struct module *find_module(const char *name)
}
#ifdef CONFIG_SMP
#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
static void *percpu_modalloc(unsigned long size, unsigned long align,
const char *name)
{
void *ptr;
if (align > PAGE_SIZE) {
printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
name, align, PAGE_SIZE);
align = PAGE_SIZE;
}
ptr = __alloc_percpu(size, align);
if (!ptr)
printk(KERN_WARNING
"Could not allocate %lu bytes percpu data\n", size);
return ptr;
}
static void percpu_modfree(void *freeme)
{
free_percpu(freeme);
}
#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
/* Number of blocks used and allocated. */
static unsigned int pcpu_num_used, pcpu_num_allocated;
/* Size of each block. -ve means used. */
......@@ -480,21 +509,6 @@ static void percpu_modfree(void *freeme)
}
}
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
{
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
}
static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
{
int cpu;
for_each_possible_cpu(cpu)
memcpy(pcpudest + per_cpu_offset(cpu), from, size);
}
static int percpu_modinit(void)
{
pcpu_num_used = 2;
......@@ -513,7 +527,26 @@ static int percpu_modinit(void)
return 0;
}
__initcall(percpu_modinit);
#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
{
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
}
static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
{
int cpu;
for_each_possible_cpu(cpu)
memcpy(pcpudest + per_cpu_offset(cpu), from, size);
}
#else /* ... !CONFIG_SMP */
static inline void *percpu_modalloc(unsigned long size, unsigned long align,
const char *name)
{
......@@ -535,6 +568,7 @@ static inline void percpu_modcopy(void *pcpudst, const void *src,
/* pcpusec should be 0, and size of that section should be 0. */
BUG_ON(size != 0);
}
#endif /* CONFIG_SMP */
#define MODINFO_ATTR(field) \
......
......@@ -9476,7 +9476,7 @@ cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
{
u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
u64 data;
#ifndef CONFIG_64BIT
......@@ -9495,7 +9495,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
{
u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
#ifndef CONFIG_64BIT
/*
......@@ -9591,7 +9591,7 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
ca = task_ca(tsk);
for (; ca; ca = ca->parent) {
u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
*cpuusage += cputime;
}
}
......
......@@ -170,7 +170,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
* doesn't hit this CPU until we're ready. */
get_cpu();
for_each_online_cpu(i) {
sm_work = percpu_ptr(stop_machine_work, i);
sm_work = per_cpu_ptr(stop_machine_work, i);
INIT_WORK(sm_work, stop_cpu);
queue_work_on(i, stop_machine_wq, sm_work);
}
......
......@@ -30,6 +30,10 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
obj-$(CONFIG_SMP) += percpu.o
else
obj-$(CONFIG_SMP) += allocpercpu.o
endif
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
......@@ -99,45 +99,51 @@ static int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
/**
* percpu_alloc_mask - initial setup of per-cpu data
* alloc_percpu - initial setup of per-cpu data
* @size: size of per-cpu object
* @gfp: may sleep or not etc.
* @mask: populate per-data for cpu's selected through mask bits
* @align: alignment
*
* Populating per-cpu data for all online cpu's would be a typical use case,
* which is simplified by the percpu_alloc() wrapper.
* Per-cpu objects are populated with zeroed buffers.
* Allocate dynamic percpu area. Percpu objects are populated with
* zeroed buffers.
*/
void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
void *__alloc_percpu(size_t size, size_t align)
{
/*
* We allocate whole cache lines to avoid false sharing
*/
size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
void *pdata = kzalloc(sz, gfp);
void *pdata = kzalloc(sz, GFP_KERNEL);
void *__pdata = __percpu_disguise(pdata);
/*
* Can't easily make larger alignment work with kmalloc. WARN
* on it. Larger alignment should only be used for module
* percpu sections on SMP for which this path isn't used.
*/
WARN_ON_ONCE(align > __alignof__(unsigned long long));
if (unlikely(!pdata))
return NULL;
if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL,
&cpu_possible_map)))
return __pdata;
kfree(pdata);
return NULL;
}
EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
EXPORT_SYMBOL_GPL(__alloc_percpu);
/**
* percpu_free - final cleanup of per-cpu data
* free_percpu - final cleanup of per-cpu data
* @__pdata: object to clean up
*
* We simply clean up any per-cpu object left. No need for the client to
* track and specify through a bis mask which per-cpu objects are to free.
*/
void percpu_free(void *__pdata)
void free_percpu(void *__pdata)
{
if (unlikely(!__pdata))
return;
__percpu_depopulate_mask(__pdata, &cpu_possible_map);
kfree(__percpu_disguise(__pdata));
}
EXPORT_SYMBOL_GPL(percpu_free);
EXPORT_SYMBOL_GPL(free_percpu);
......@@ -37,6 +37,16 @@ static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
static int bootmem_debug;
/*
* If an arch needs to apply workarounds to bootmem allocation, it can
* set CONFIG_HAVE_ARCH_BOOTMEM and define a wrapper around
* __alloc_bootmem_core().
*/
#ifndef CONFIG_HAVE_ARCH_BOOTMEM
#define alloc_bootmem_core(bdata, size, align, goal, limit) \
__alloc_bootmem_core((bdata), (size), (align), (goal), (limit))
#endif
static int __init bootmem_debug_setup(char *buf)
{
bootmem_debug = 1;
......@@ -382,7 +392,6 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
}
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
/**
* reserve_bootmem - mark a page range as usable
* @addr: starting address of the range
......@@ -403,7 +412,6 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
return mark_bootmem(start, end, 1, flags);
}
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
unsigned long step)
......@@ -428,7 +436,7 @@ static unsigned long align_off(struct bootmem_data *bdata, unsigned long off,
return ALIGN(base + off, align) - base;
}
static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
static void * __init __alloc_bootmem_core(struct bootmem_data *bdata,
unsigned long size, unsigned long align,
unsigned long goal, unsigned long limit)
{
......
This diff is collapsed.
......@@ -24,6 +24,7 @@
#include <linux/radix-tree.h>
#include <linux/rcupdate.h>
#include <linux/bootmem.h>
#include <linux/pfn.h>
#include <asm/atomic.h>
#include <asm/uaccess.h>
......@@ -152,8 +153,8 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
*
* Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
*/
static int vmap_page_range(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages)
static int vmap_page_range_noflush(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages)
{
pgd_t *pgd;
unsigned long next;
......@@ -169,13 +170,22 @@ static int vmap_page_range(unsigned long start, unsigned long end,
if (err)
break;
} while (pgd++, addr = next, addr != end);
flush_cache_vmap(start, end);
if (unlikely(err))
return err;
return nr;
}
static int vmap_page_range(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages)
{
int ret;
ret = vmap_page_range_noflush(start, end, prot, pages);
flush_cache_vmap(start, end);
return ret;
}
static inline int is_vmalloc_or_module_addr(const void *x)
{
/*
......@@ -982,6 +992,32 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
}
EXPORT_SYMBOL(vm_map_ram);
/**
* vm_area_register_early - register vmap area early during boot
* @vm: vm_struct to register
* @align: requested alignment
*
* This function is used to register kernel vm area before
* vmalloc_init() is called. @vm->size and @vm->flags should contain
* proper values on entry and other fields should be zero. On return,
* vm->addr contains the allocated address.
*
* DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
*/
void __init vm_area_register_early(struct vm_struct *vm, size_t align)
{
static size_t vm_init_off __initdata;
unsigned long addr;
addr = ALIGN(VMALLOC_START + vm_init_off, align);
vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
vm->addr = (void *)addr;
vm->next = vmlist;
vmlist = vm;
}
void __init vmalloc_init(void)
{
struct vmap_area *va;
......@@ -1009,6 +1045,58 @@ void __init vmalloc_init(void)
vmap_initialized = true;
}
/**
* map_kernel_range_noflush - map kernel VM area with the specified pages
* @addr: start of the VM area to map
* @size: size of the VM area to map
* @prot: page protection flags to use
* @pages: pages to map
*
* Map PFN_UP(@size) pages at @addr. The VM area @addr and @size
* specify should have been allocated using get_vm_area() and its
* friends.
*
* NOTE:
* This function does NOT do any cache flushing. The caller is
* responsible for calling flush_cache_vmap() on to-be-mapped areas
* before calling this function.
*
* RETURNS:
* The number of pages mapped on success, -errno on failure.
*/
int map_kernel_range_noflush(unsigned long addr, unsigned long size,
pgprot_t prot, struct page **pages)
{
return vmap_page_range_noflush(addr, addr + size, prot, pages);
}
/**
* unmap_kernel_range_noflush - unmap kernel VM area
* @addr: start of the VM area to unmap
* @size: size of the VM area to unmap
*
* Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size
* specify should have been allocated using get_vm_area() and its
* friends.
*
* NOTE:
* This function does NOT do any cache flushing. The caller is
* responsible for calling flush_cache_vunmap() on to-be-mapped areas
* before calling this function and flush_tlb_kernel_range() after.
*/
void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
{
vunmap_page_range(addr, addr + size);
}
/**
* unmap_kernel_range - unmap kernel VM area and flush cache and TLB
* @addr: start of the VM area to unmap
* @size: size of the VM area to unmap
*
* Similar to unmap_kernel_range_noflush() but flushes vcache before
* the unmapping and tlb after.
*/
void unmap_kernel_range(unsigned long addr, unsigned long size)
{
unsigned long end = addr + size;
......
......@@ -1375,10 +1375,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field);
int snmp_mib_init(void *ptr[2], size_t mibsize)
{
BUG_ON(ptr == NULL);
ptr[0] = __alloc_percpu(mibsize);
ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
if (!ptr[0])
goto err0;
ptr[1] = __alloc_percpu(mibsize);
ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
if (!ptr[1])
goto err1;
return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment