Commit 9b80b11c authored by Linus Torvalds's avatar Linus Torvalds
parents b0b7b8ea 271c3f35
......@@ -581,17 +581,12 @@ config ARCH_FLATMEM_ENABLE
def_bool y
depends on PPC64 && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
depends on SMP && PPC_PSERIES
config ARCH_DISCONTIGMEM_DEFAULT
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on ARCH_DISCONTIGMEM_ENABLE
config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on ARCH_DISCONTIGMEM_ENABLE
depends on SMP && PPC_PSERIES
source "mm/Kconfig"
......
......@@ -12,11 +12,13 @@ CFLAGS_btext.o += -fPIC
endif
obj-y := semaphore.o cputable.o ptrace.o syscalls.o \
irq.o signal_32.o pmc.o
irq.o signal_32.o pmc.o vdso.o
obj-y += vdso32/
obj-$(CONFIG_PPC64) += setup_64.o binfmt_elf32.o sys_ppc32.o \
signal_64.o ptrace32.o systbl.o \
paca.o ioctl32.o cpu_setup_power4.o \
firmware.o sysfs.o
firmware.o sysfs.o udbg.o
obj-$(CONFIG_PPC64) += vdso64/
obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o
obj-$(CONFIG_POWER4) += idle_power4.o
obj-$(CONFIG_PPC_OF) += of_device.o
......@@ -29,6 +31,10 @@ obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o
udbgscc-$(CONFIG_PPC64) := udbg_scc.o
obj-$(CONFIG_PPC_PMAC) += $(udbgscc-y)
ifeq ($(CONFIG_PPC_MERGE),y)
......
......@@ -37,12 +37,12 @@
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/rtas.h>
#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/lppaca.h>
#include <asm/iseries/hv_lp_event.h>
#include <asm/cache.h>
#include <asm/systemcfg.h>
#include <asm/compat.h>
#endif
......@@ -251,25 +251,42 @@ int main(void)
DEFINE(TASK_SIZE, TASK_SIZE);
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
#else /* CONFIG_PPC64 */
/* systemcfg offsets for use by vdso */
DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp));
DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec));
DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs));
DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec));
DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count));
DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest));
DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime));
DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32));
DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64));
#endif /* ! CONFIG_PPC64 */
/* timeval/timezone offsets for use by vdso */
/* datapage offsets for use by vdso */
DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
#ifdef CONFIG_PPC64
DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
#else
DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TSPEC32_TV_SEC, offsetof(struct timespec, tv_sec));
DEFINE(TSPEC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
#endif
/* timeval/timezone offsets for use by vdso */
DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
#endif /* CONFIG_PPC64 */
/* Other bits used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(CLOCK_REALTIME_RES, TICK_NSEC);
return 0;
}
......@@ -1100,6 +1100,7 @@ start_here:
mr r3,r31
mr r4,r30
bl machine_init
bl __save_cpu_setup
bl MMU_init
#ifdef CONFIG_APUS
......
......@@ -35,7 +35,7 @@
#include <asm/time.h>
#include <asm/iseries/it_exp_vpd_panel.h>
#include <asm/prom.h>
#include <asm/systemcfg.h>
#include <asm/vdso_datapage.h>
#define MODULE_VERS "1.6"
#define MODULE_NAME "lparcfg"
......@@ -43,7 +43,7 @@
/* #define LPARCFG_DEBUG */
/* find a better place for this function... */
void log_plpar_hcall_return(unsigned long rc, char *tag)
static void log_plpar_hcall_return(unsigned long rc, char *tag)
{
if (rc == 0) /* success, return */
return;
......@@ -213,11 +213,10 @@ static void h_pic(unsigned long *pool_idle_time, unsigned long *num_procs)
unsigned long dummy;
rc = plpar_hcall(H_PIC, 0, 0, 0, 0, pool_idle_time, num_procs, &dummy);
if (rc != H_Authority)
log_plpar_hcall_return(rc, "H_PIC");
}
static unsigned long get_purr(void);
/* Track sum of all purrs across all processors. This is used to further */
/* calculate usage values by different applications */
......@@ -319,8 +318,6 @@ static void parse_system_parameter_string(struct seq_file *m)
kfree(local_buffer);
}
static int lparcfg_count_active_processors(void);
/* Return the number of processors in the system.
* This function reads through the device tree and counts
* the virtual processors, this does not include threads.
......@@ -372,7 +369,7 @@ static int lparcfg_data(struct seq_file *m, void *v)
lrdrp = (int *)get_property(rtas_node, "ibm,lrdr-capacity", NULL);
if (lrdrp == NULL) {
partition_potential_processors = _systemcfg->processorCount;
partition_potential_processors = vdso_data->processorCount;
} else {
partition_potential_processors = *(lrdrp + 4);
}
......@@ -548,7 +545,7 @@ static ssize_t lparcfg_write(struct file *file, const char __user * buf,
retval = -EIO;
}
out:
out:
kfree(kbuf);
return retval;
}
......
......@@ -15,17 +15,10 @@
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/page.h>
#include <asm/systemcfg.h>
#include <asm/lppaca.h>
#include <asm/iseries/it_lp_queue.h>
#include <asm/paca.h>
static union {
struct systemcfg data;
u8 page[PAGE_SIZE];
} systemcfg_store __attribute__((__section__(".data.page.aligned")));
struct systemcfg *_systemcfg = &systemcfg_store.data;
/* This symbol is provided by the linker - let it fill in the paca
* field correctly */
......
......@@ -23,7 +23,7 @@
#include <linux/slab.h>
#include <linux/kernel.h>
#include <asm/systemcfg.h>
#include <asm/vdso_datapage.h>
#include <asm/rtas.h>
#include <asm/uaccess.h>
#include <asm/prom.h>
......@@ -72,7 +72,7 @@ static int __init proc_ppc64_init(void)
if (!pde)
return 1;
pde->nlink = 1;
pde->data = _systemcfg;
pde->data = vdso_data;
pde->size = PAGE_SIZE;
pde->proc_fops = &page_map_fops;
......
......@@ -32,7 +32,6 @@
#include <asm/rtas.h>
#include <asm/machdep.h> /* for ppc_md */
#include <asm/time.h>
#include <asm/systemcfg.h>
/* Token for Sensors */
#define KEY_SWITCH 0x0001
......
......@@ -33,7 +33,7 @@
#include <asm/io.h>
#include <asm/prom.h>
#include <asm/processor.h>
#include <asm/systemcfg.h>
#include <asm/vdso_datapage.h>
#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
......@@ -444,10 +444,8 @@ void __init check_for_initrd(void)
if (initrd_start >= KERNELBASE && initrd_end >= KERNELBASE &&
initrd_end > initrd_start)
ROOT_DEV = Root_RAM0;
else {
printk("Bogus initrd %08lx %08lx\n", initrd_start, initrd_end);
else
initrd_start = initrd_end = 0;
}
if (initrd_start)
printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
......@@ -566,7 +564,7 @@ void __init smp_setup_cpu_maps(void)
cpu_set(cpu ^ 0x1, cpu_sibling_map[cpu]);
}
_systemcfg->processorCount = num_present_cpus();
vdso_data->processorCount = num_present_cpus();
#endif /* CONFIG_PPC64 */
}
#endif /* CONFIG_SMP */
......
......@@ -57,7 +57,6 @@
#include <asm/lmb.h>
#include <asm/iseries/it_lp_naca.h>
#include <asm/firmware.h>
#include <asm/systemcfg.h>
#include <asm/xmon.h>
#include <asm/udbg.h>
......@@ -375,8 +374,7 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find dcache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
_systemcfg->dcache_size = ppc64_caches.dsize = size;
_systemcfg->dcache_line_size =
ppc64_caches.dsize = size;
ppc64_caches.dline_size = lsize;
ppc64_caches.log_dline_size = __ilog2(lsize);
ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
......@@ -393,22 +391,13 @@ static void __init initialize_cache_info(void)
DBG("Argh, can't find icache properties ! "
"sizep: %p, lsizep: %p\n", sizep, lsizep);
_systemcfg->icache_size = ppc64_caches.isize = size;
_systemcfg->icache_line_size =
ppc64_caches.isize = size;
ppc64_caches.iline_size = lsize;
ppc64_caches.log_iline_size = __ilog2(lsize);
ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
}
}
/* Add an eye catcher and the systemcfg layout version number */
strcpy(_systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
_systemcfg->version.major = SYSTEMCFG_MAJOR;
_systemcfg->version.minor = SYSTEMCFG_MINOR;
_systemcfg->processor = mfspr(SPRN_PVR);
_systemcfg->platform = _machine;
_systemcfg->physicalMemorySize = lmb_phys_mem_size();
DBG(" <- initialize_cache_info()\n");
}
......@@ -495,11 +484,10 @@ void __init setup_system(void)
printk("-----------------------------------------------------\n");
printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size);
printk("ppc64_interrupt_controller = 0x%ld\n", ppc64_interrupt_controller);
printk("systemcfg = 0x%p\n", _systemcfg);
printk("systemcfg->platform = 0x%x\n", _systemcfg->platform);
printk("systemcfg->processorCount = 0x%lx\n", _systemcfg->processorCount);
printk("systemcfg->physicalMemorySize = 0x%lx\n", _systemcfg->physicalMemorySize);
printk("ppc64_interrupt_controller = 0x%ld\n",
ppc64_interrupt_controller);
printk("platform = 0x%x\n", _machine);
printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size());
printk("ppc64_caches.dcache_line_size = 0x%x\n",
ppc64_caches.dline_size);
printk("ppc64_caches.icache_line_size = 0x%x\n",
......@@ -567,33 +555,6 @@ static void __init emergency_stack_init(void)
__va(lmb_alloc_base(HW_PAGE_SIZE, 128, limit)) + HW_PAGE_SIZE;
}
/*
* Called from setup_arch to initialize the bitmap of available
* syscalls in the systemcfg page
*/
void __init setup_syscall_map(void)
{
unsigned int i, count64 = 0, count32 = 0;
extern unsigned long *sys_call_table;
extern unsigned long sys_ni_syscall;
for (i = 0; i < __NR_syscalls; i++) {
if (sys_call_table[i*2] != sys_ni_syscall) {
count64++;
_systemcfg->syscall_map_64[i >> 5] |=
0x80000000UL >> (i & 0x1f);
}
if (sys_call_table[i*2+1] != sys_ni_syscall) {
count32++;
_systemcfg->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
}
}
printk(KERN_INFO "Syscall map setup, %d 32-bit and %d 64-bit syscalls\n",
count32, count64);
}
/*
* Called into from start_kernel, after lock_kernel has been called.
* Initializes bootmem, which is unsed to manage page allocation until
......@@ -635,9 +596,6 @@ void __init setup_arch(char **cmdline_p)
do_init_bootmem();
sparse_init();
/* initialize the syscall map in systemcfg */
setup_syscall_map();
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
......
......@@ -43,10 +43,10 @@
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/sigcontext.h>
#include <asm/vdso.h>
#ifdef CONFIG_PPC64
#include "ppc32.h"
#include <asm/unistd.h>
#include <asm/vdso.h>
#else
#include <asm/ucontext.h>
#include <asm/pgtable.h>
......@@ -809,14 +809,11 @@ static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka,
/* Save user registers on the stack */
frame = &rt_sf->uc.uc_mcontext;
#ifdef CONFIG_PPC64
if (vdso32_rt_sigtramp && current->thread.vdso_base) {
if (save_user_regs(regs, frame, 0))
goto badframe;
regs->link = current->thread.vdso_base + vdso32_rt_sigtramp;
} else
#endif
{
} else {
if (save_user_regs(regs, frame, __NR_rt_sigreturn))
goto badframe;
regs->link = (unsigned long) frame->tramp;
......@@ -1090,14 +1087,11 @@ static int handle_signal(unsigned long sig, struct k_sigaction *ka,
|| __put_user(sig, &sc->signal))
goto badframe;
#ifdef CONFIG_PPC64
if (vdso32_sigtramp && current->thread.vdso_base) {
if (save_user_regs(regs, &frame->mctx, 0))
goto badframe;
regs->link = current->thread.vdso_base + vdso32_sigtramp;
} else
#endif
{
} else {
if (save_user_regs(regs, &frame->mctx, __NR_sigreturn))
goto badframe;
regs->link = (unsigned long) frame->mctx.tramp;
......
......@@ -44,7 +44,7 @@
#include <asm/cputable.h>
#include <asm/system.h>
#include <asm/mpic.h>
#include <asm/systemcfg.h>
#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#endif
......@@ -371,7 +371,7 @@ int generic_cpu_disable(void)
cpu_clear(cpu, cpu_online_map);
#ifdef CONFIG_PPC64
_systemcfg->processorCount--;
vdso_data->processorCount--;
fixup_irqs(cpu_online_map);
#endif
return 0;
......
......@@ -16,7 +16,6 @@
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/prom.h>
#include <asm/systemcfg.h>
#include <asm/paca.h>
#include <asm/lppaca.h>
#include <asm/machdep.h>
......
......@@ -62,8 +62,8 @@
#include <asm/irq.h>
#include <asm/div64.h>
#include <asm/smp.h>
#include <asm/vdso_datapage.h>
#ifdef CONFIG_PPC64
#include <asm/systemcfg.h>
#include <asm/firmware.h>
#endif
#ifdef CONFIG_PPC_ISERIES
......@@ -261,7 +261,6 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
do_gtod.varp = temp_varp;
do_gtod.var_idx = temp_idx;
#ifdef CONFIG_PPC64
/*
* tb_update_count is used to allow the userspace gettimeofday code
* to assure itself that it sees a consistent view of the tb_to_xs and
......@@ -271,14 +270,15 @@ static inline void update_gtod(u64 new_tb_stamp, u64 new_stamp_xsec,
* tb_to_xs and stamp_xsec values are consistent. If not, then it
* loops back and reads them again until this criteria is met.
*/
++(_systemcfg->tb_update_count);
++(vdso_data->tb_update_count);
smp_wmb();
_systemcfg->tb_orig_stamp = new_tb_stamp;
_systemcfg->stamp_xsec = new_stamp_xsec;
_systemcfg->tb_to_xs = new_tb_to_xs;
vdso_data->tb_orig_stamp = new_tb_stamp;
vdso_data->stamp_xsec = new_stamp_xsec;
vdso_data->tb_to_xs = new_tb_to_xs;
vdso_data->wtom_clock_sec = wall_to_monotonic.tv_sec;
vdso_data->wtom_clock_nsec = wall_to_monotonic.tv_nsec;
smp_wmb();
++(_systemcfg->tb_update_count);
#endif
++(vdso_data->tb_update_count);
}
/*
......@@ -357,9 +357,8 @@ static void iSeries_tb_recal(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
tb_to_xs = divres.result_low;
do_gtod.varp->tb_to_xs = tb_to_xs;
_systemcfg->tb_ticks_per_sec =
tb_ticks_per_sec;
_systemcfg->tb_to_xs = tb_to_xs;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
vdso_data->tb_to_xs = tb_to_xs;
}
else {
printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
......@@ -561,10 +560,8 @@ int do_settimeofday(struct timespec *tv)
new_xsec += (u64)new_sec * XSEC_PER_SEC - tb_delta_xs;
update_gtod(tb_last_jiffy, new_xsec, do_gtod.varp->tb_to_xs);
#ifdef CONFIG_PPC64
_systemcfg->tz_minuteswest = sys_tz.tz_minuteswest;
_systemcfg->tz_dsttime = sys_tz.tz_dsttime;
#endif
vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
vdso_data->tz_dsttime = sys_tz.tz_dsttime;
write_sequnlock_irqrestore(&xtime_lock, flags);
clock_was_set();
......@@ -713,13 +710,12 @@ void __init time_init(void)
do_gtod.tb_ticks_per_sec = tb_ticks_per_sec;
do_gtod.varp->tb_to_xs = tb_to_xs;
do_gtod.tb_to_us = tb_to_us;
#ifdef CONFIG_PPC64
_systemcfg->tb_orig_stamp = tb_last_jiffy;
_systemcfg->tb_update_count = 0;
_systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
_systemcfg->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
_systemcfg->tb_to_xs = tb_to_xs;
#endif
vdso_data->tb_orig_stamp = tb_last_jiffy;
vdso_data->tb_update_count = 0;
vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
vdso_data->stamp_xsec = xtime.tv_sec * XSEC_PER_SEC;
vdso_data->tb_to_xs = tb_to_xs;
time_freq = 0;
......
......@@ -49,7 +49,6 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/processor.h>
#include <asm/systemcfg.h>
#endif
#ifdef CONFIG_PPC64 /* XXX */
......
/*
* linux/arch/ppc64/kernel/vdso.c
*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/elf.h>
#include <linux/security.h>
#include <linux/bootmem.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/lmb.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
#undef DEBUG
#ifdef DEBUG
#define DBG(fmt...) printk(fmt)
#else
#define DBG(fmt...)
#endif
/* Max supported size for symbol names */
#define MAX_SYMNAME 64
extern char vdso32_start, vdso32_end;
static void *vdso32_kbase = &vdso32_start;
unsigned int vdso32_pages;
unsigned long vdso32_sigtramp;
unsigned long vdso32_rt_sigtramp;
#ifdef CONFIG_PPC64
extern char vdso64_start, vdso64_end;
static void *vdso64_kbase = &vdso64_start;
unsigned int vdso64_pages;
unsigned long vdso64_rt_sigtramp;
#endif /* CONFIG_PPC64 */
/*
* The vdso data page (aka. systemcfg for old ppc64 fans) is here.
* Once the early boot kernel code no longer needs to muck around
* with it, it will become dynamically allocated
*/
static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __attribute__((__section__(".data.page_aligned")));
struct vdso_data *vdso_data = &vdso_data_store.data;
/* Format of the patch table */
struct vdso_patch_def
{
unsigned long ftr_mask, ftr_value;
const char *gen_name;
const char *fix_name;
};
/* Table of functions to patch based on the CPU type/revision
*
* Currently, we only change sync_dicache to do nothing on processors
* with a coherent icache
*/
static struct vdso_patch_def vdso_patches[] = {
{
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
{
CPU_FTR_USE_TB, 0,
"__kernel_gettimeofday", NULL
},
};
/*
* Some infos carried around for each of them during parsing at
* boot time.
*/
struct lib32_elfinfo
{
Elf32_Ehdr *hdr; /* ptr to ELF */
Elf32_Sym *dynsym; /* ptr to .dynsym section */
unsigned long dynsymsize; /* size of .dynsym section */
char *dynstr; /* ptr to .dynstr section */
unsigned long text; /* offset of .text section in .so */
};
struct lib64_elfinfo
{
Elf64_Ehdr *hdr;
Elf64_Sym *dynsym;
unsigned long dynsymsize;
char *dynstr;
unsigned long text;
};
#ifdef __DEBUG
static void dump_one_vdso_page(struct page *pg, struct page *upg)
{
printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
page_count(pg),
pg->flags);
if (upg/* && pg != upg*/) {
printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
<< PAGE_SHIFT),
page_count(upg),
upg->flags);
}
printk("\n");
}
static void dump_vdso_pages(struct vm_area_struct * vma)
{
int i;
if (!vma || test_thread_flag(TIF_32BIT)) {
printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
for (i=0; i<vdso32_pages; i++) {
struct page *pg = virt_to_page(vdso32_kbase +
i*PAGE_SIZE);
struct page *upg = (vma && vma->vm_mm) ?
follow_page(vma->vm_mm, vma->vm_start +
i*PAGE_SIZE, 0)
: NULL;
dump_one_vdso_page(pg, upg);
}
}
if (!vma || !test_thread_flag(TIF_32BIT)) {
printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
for (i=0; i<vdso64_pages; i++) {
struct page *pg = virt_to_page(vdso64_kbase +
i*PAGE_SIZE);
struct page *upg = (vma && vma->vm_mm) ?
follow_page(vma->vm_mm, vma->vm_start +
i*PAGE_SIZE, 0)
: NULL;
dump_one_vdso_page(pg, upg);
}
}
}
#endif /* DEBUG */
/*
* Keep a dummy vma_close for now, it will prevent VMA merging.
*/
static void vdso_vma_close(struct vm_area_struct * vma)
{
}
/*
* Our nopage() function, maps in the actual vDSO kernel pages, they will
* be mapped read-only by do_no_page(), and eventually COW'ed, either
* right away for an initial write access, or by do_wp_page().
*/
static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
unsigned long address, int *type)
{
unsigned long offset = address - vma->vm_start;
struct page *pg;
#ifdef CONFIG_PPC64
void *vbase = test_thread_flag(TIF_32BIT) ?
vdso32_kbase : vdso64_kbase;
#else
void *vbase = vdso32_kbase;
#endif
DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
current->comm, address, offset);
if (address < vma->vm_start || address > vma->vm_end)
return NOPAGE_SIGBUS;
/*
* Last page is systemcfg.
*/
if ((vma->vm_end - address) <= PAGE_SIZE)
pg = virt_to_page(vdso_data);
else
pg = virt_to_page(vbase + offset);
get_page(pg);
DBG(" ->page count: %d\n", page_count(pg));
return pg;
}
static struct vm_operations_struct vdso_vmops = {
.close = vdso_vma_close,
.nopage = vdso_vma_nopage,
};
/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
*/
int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long vdso_pages;
unsigned long vdso_base;
#ifdef CONFIG_PPC64
if (test_thread_flag(TIF_32BIT)) {
vdso_pages = vdso32_pages;
vdso_base = VDSO32_MBASE;
} else {
vdso_pages = vdso64_pages;
vdso_base = VDSO64_MBASE;
}
#else
vdso_pages = vdso32_pages;
vdso_base = VDSO32_MBASE;
#endif
current->thread.vdso_base = 0;
/* vDSO has a problem and was disabled, just don't "enable" it for the
* process
*/
if (vdso_pages == 0)
return 0;
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (vma == NULL)
return -ENOMEM;
memset(vma, 0, sizeof(*vma));
/* Add a page to the vdso size for the data page */
vdso_pages ++;
/*
* pick a base address for the vDSO in process space. We try to put it
* at vdso_base which is the "natural" base for it, but we might fail
* and end up putting it elsewhere.
*/
vdso_base = get_unmapped_area(NULL, vdso_base,
vdso_pages << PAGE_SHIFT, 0, 0);
if (vdso_base & ~PAGE_MASK) {
kmem_cache_free(vm_area_cachep, vma);
return (int)vdso_base;
}
current->thread.vdso_base = vdso_base;
vma->vm_mm = mm;
vma->vm_start = current->thread.vdso_base;
vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT);
/*
* our vma flags don't have VM_WRITE so by default, the process isn't
* allowed to write those pages.
* gdb can break that with ptrace interface, and thus trigger COW on
* those pages but it's then your responsibility to never do that on
* the "data" page of the vDSO or you'll stop getting kernel updates
* and your nice userland gettimeofday will be totally dead.
* It's fine to use that for setting breakpoints in the vDSO code
* pages though
*/
vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE |
VM_MAYEXEC | VM_RESERVED;
vma->vm_flags |= mm->def_flags;
vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
vma->vm_ops = &vdso_vmops;
down_write(&mm->mmap_sem);
if (insert_vm_struct(mm, vma)) {
up_write(&mm->mmap_sem);
kmem_cache_free(vm_area_cachep, vma);
return -ENOMEM;
}
mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
up_write(&mm->mmap_sem);
return 0;
}
static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
unsigned long *size)
{
Elf32_Shdr *sechdrs;
unsigned int i;
char *secnames;
/* Grab section headers and strings so we can tell who is who */
sechdrs = (void *)ehdr + ehdr->e_shoff;
secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
/* Find the section they want */
for (i = 1; i < ehdr->e_shnum; i++) {
if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
if (size)
*size = sechdrs[i].sh_size;
return (void *)ehdr + sechdrs[i].sh_offset;
}
}
*size = 0;
return NULL;
}
static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib,
const char *symname)
{
unsigned int i;
char name[MAX_SYMNAME], *c;
for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
if (lib->dynsym[i].st_name == 0)
continue;
strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
MAX_SYMNAME);
c = strchr(name, '@');
if (c)
*c = 0;
if (strcmp(symname, name) == 0)
return &lib->dynsym[i];
}
return NULL;
}
/* Note that we assume the section is .text and the symbol is relative to
* the library base
*/
static unsigned long __init find_function32(struct lib32_elfinfo *lib,
const char *symname)
{
Elf32_Sym *sym = find_symbol32(lib, symname);
if (sym == NULL) {
printk(KERN_WARNING "vDSO32: function %s not found !\n",
symname);
return 0;
}
return sym->st_value - VDSO32_LBASE;
}
static int vdso_do_func_patch32(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64,
const char *orig, const char *fix)
{
Elf32_Sym *sym32_gen, *sym32_fix;
sym32_gen = find_symbol32(v32, orig);
if (sym32_gen == NULL) {
printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
return -1;
}
if (fix == NULL) {
sym32_gen->st_name = 0;
return 0;
}
sym32_fix = find_symbol32(v32, fix);
if (sym32_fix == NULL) {
printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
return -1;
}
sym32_gen->st_value = sym32_fix->st_value;
sym32_gen->st_size = sym32_fix->st_size;
sym32_gen->st_info = sym32_fix->st_info;
sym32_gen->st_other = sym32_fix->st_other;
sym32_gen->st_shndx = sym32_fix->st_shndx;
return 0;
}
#ifdef CONFIG_PPC64
static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
unsigned long *size)
{
Elf64_Shdr *sechdrs;
unsigned int i;
char *secnames;
/* Grab section headers and strings so we can tell who is who */
sechdrs = (void *)ehdr + ehdr->e_shoff;
secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
/* Find the section they want */
for (i = 1; i < ehdr->e_shnum; i++) {
if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
if (size)
*size = sechdrs[i].sh_size;
return (void *)ehdr + sechdrs[i].sh_offset;
}
}
if (size)
*size = 0;
return NULL;
}
static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib,
const char *symname)
{
unsigned int i;
char name[MAX_SYMNAME], *c;
for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
if (lib->dynsym[i].st_name == 0)
continue;
strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
MAX_SYMNAME);
c = strchr(name, '@');
if (c)
*c = 0;
if (strcmp(symname, name) == 0)
return &lib->dynsym[i];
}
return NULL;
}
/* Note that we assume the section is .text and the symbol is relative to
* the library base
*/
static unsigned long __init find_function64(struct lib64_elfinfo *lib,
const char *symname)
{
Elf64_Sym *sym = find_symbol64(lib, symname);
if (sym == NULL) {
printk(KERN_WARNING "vDSO64: function %s not found !\n",
symname);
return 0;
}
#ifdef VDS64_HAS_DESCRIPTORS
return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
VDSO64_LBASE;
#else
return sym->st_value - VDSO64_LBASE;
#endif
}
static int vdso_do_func_patch64(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64,
const char *orig, const char *fix)
{
Elf64_Sym *sym64_gen, *sym64_fix;
sym64_gen = find_symbol64(v64, orig);
if (sym64_gen == NULL) {
printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
return -1;
}
if (fix == NULL) {
sym64_gen->st_name = 0;
return 0;
}
sym64_fix = find_symbol64(v64, fix);
if (sym64_fix == NULL) {
printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
return -1;
}
sym64_gen->st_value = sym64_fix->st_value;
sym64_gen->st_size = sym64_fix->st_size;
sym64_gen->st_info = sym64_fix->st_info;
sym64_gen->st_other = sym64_fix->st_other;
sym64_gen->st_shndx = sym64_fix->st_shndx;
return 0;
}
#endif /* CONFIG_PPC64 */
static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
void *sect;
/*
* Locate symbol tables & text section
*/
v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
if (v32->dynsym == NULL || v32->dynstr == NULL) {
printk(KERN_ERR "vDSO32: required symbol section not found\n");
return -1;
}
sect = find_section32(v32->hdr, ".text", NULL);
if (sect == NULL) {
printk(KERN_ERR "vDSO32: the .text section was not found\n");
return -1;
}
v32->text = sect - vdso32_kbase;
#ifdef CONFIG_PPC64
v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
if (v64->dynsym == NULL || v64->dynstr == NULL) {
printk(KERN_ERR "vDSO64: required symbol section not found\n");
return -1;
}
sect = find_section64(v64->hdr, ".text", NULL);
if (sect == NULL) {
printk(KERN_ERR "vDSO64: the .text section was not found\n");
return -1;
}
v64->text = sect - vdso64_kbase;
#endif /* CONFIG_PPC64 */
return 0;
}
static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
/*
* Find signal trampolines
*/
#ifdef CONFIG_PPC64
vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
#endif
vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
}
static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
Elf32_Sym *sym32;
#ifdef CONFIG_PPC64
Elf64_Sym *sym64;
sym64 = find_symbol64(v64, "__kernel_datapage_offset");
if (sym64 == NULL) {
printk(KERN_ERR "vDSO64: Can't find symbol "
"__kernel_datapage_offset !\n");
return -1;
}
*((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
(vdso64_pages << PAGE_SHIFT) -
(sym64->st_value - VDSO64_LBASE);
#endif /* CONFIG_PPC64 */
sym32 = find_symbol32(v32, "__kernel_datapage_offset");
if (sym32 == NULL) {
printk(KERN_ERR "vDSO32: Can't find symbol "
"__kernel_datapage_offset !\n");
return -1;
}
*((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
(vdso32_pages << PAGE_SHIFT) -
(sym32->st_value - VDSO32_LBASE);
return 0;
}
static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
int i;
for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
struct vdso_patch_def *patch = &vdso_patches[i];
int match = (cur_cpu_spec->cpu_features & patch->ftr_mask)
== patch->ftr_value;
if (!match)
continue;
DBG("replacing %s with %s...\n", patch->gen_name,
patch->fix_name ? "NONE" : patch->fix_name);
/*
* Patch the 32 bits and 64 bits symbols. Note that we do not
* patch the "." symbol on 64 bits.
* It would be easy to do, but doesn't seem to be necessary,
* patching the OPD symbol is enough.
*/
vdso_do_func_patch32(v32, v64, patch->gen_name,
patch->fix_name);
#ifdef CONFIG_PPC64
vdso_do_func_patch64(v32, v64, patch->gen_name,
patch->fix_name);
#endif /* CONFIG_PPC64 */
}
return 0;
}
static __init int vdso_setup(void)
{
struct lib32_elfinfo v32;
struct lib64_elfinfo v64;
v32.hdr = vdso32_kbase;
#ifdef CONFIG_PPC64
v64.hdr = vdso64_kbase;
#endif
if (vdso_do_find_sections(&v32, &v64))
return -1;
if (vdso_fixup_datapage(&v32, &v64))
return -1;
if (vdso_fixup_alt_funcs(&v32, &v64))
return -1;
vdso_setup_trampolines(&v32, &v64);
return 0;
}
/*
* Called from setup_arch to initialize the bitmap of available
* syscalls in the systemcfg page
*/
static void __init vdso_setup_syscall_map(void)
{
unsigned int i;
extern unsigned long *sys_call_table;
extern unsigned long sys_ni_syscall;
for (i = 0; i < __NR_syscalls; i++) {
#ifdef CONFIG_PPC64
if (sys_call_table[i*2] != sys_ni_syscall)
vdso_data->syscall_map_64[i >> 5] |=
0x80000000UL >> (i & 0x1f);
if (sys_call_table[i*2+1] != sys_ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
#else /* CONFIG_PPC64 */
if (sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
#endif /* CONFIG_PPC64 */
}
}
void __init vdso_init(void)
{
int i;
#ifdef CONFIG_PPC64
/*
* Fill up the "systemcfg" stuff for backward compatiblity
*/
strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
vdso_data->version.major = SYSTEMCFG_MAJOR;
vdso_data->version.minor = SYSTEMCFG_MINOR;
vdso_data->processor = mfspr(SPRN_PVR);
vdso_data->platform = _machine;
vdso_data->physicalMemorySize = lmb_phys_mem_size();
vdso_data->dcache_size = ppc64_caches.dsize;
vdso_data->dcache_line_size = ppc64_caches.dline_size;
vdso_data->icache_size = ppc64_caches.isize;
vdso_data->icache_line_size = ppc64_caches.iline_size;
/*
* Calculate the size of the 64 bits vDSO
*/
vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
#endif /* CONFIG_PPC64 */
/*
* Calculate the size of the 32 bits vDSO
*/
vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
/*
* Setup the syscall map in the vDOS
*/
vdso_setup_syscall_map();
/*
* Initialize the vDSO images in memory, that is do necessary
* fixups of vDSO symbols, locate trampolines, etc...
*/
if (vdso_setup()) {
printk(KERN_ERR "vDSO setup failure, not enabled !\n");
vdso32_pages = 0;
#ifdef CONFIG_PPC64
vdso64_pages = 0;
#endif
return;
}
/* Make sure pages are in the correct state */
for (i = 0; i < vdso32_pages; i++) {
struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
ClearPageReserved(pg);
get_page(pg);
}
#ifdef CONFIG_PPC64
for (i = 0; i < vdso64_pages; i++) {
struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
ClearPageReserved(pg);
get_page(pg);
}
#endif /* CONFIG_PPC64 */
get_page(virt_to_page(vdso_data));
}
int in_gate_area_no_task(unsigned long addr)
{
return 0;
}
int in_gate_area(struct task_struct *task, unsigned long addr)
{
return 0;
}
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
return NULL;
}
......@@ -5,6 +5,10 @@ obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o
# Build rules
ifeq ($(CONFIG_PPC32),y)
CROSS32CC := $(CC)
endif
targets := $(obj-vdso32) vdso32.so
obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
......@@ -15,7 +19,7 @@ EXTRA_AFLAGS := -D__VDSO32__ -s
obj-y += vdso32_wrapper.o
extra-y += vdso32.lds
CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
CPPFLAGS_vdso32.lds += -P -C -Upowerpc
# Force dependency (incbin is bad)
$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
......
......@@ -66,3 +66,19 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_syscall_map)
/*
* void unsigned long long __kernel_get_tbfreq(void);
*
* returns the timebase frequency in HZ
*/
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
bl __get_datapage@local
lwz r3,CFG_TB_TICKS_PER_SEC(r3)
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
mtlr r12
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
......@@ -2,7 +2,8 @@
* Userland implementation of gettimeofday() for 32 bits processes in a
* ppc64 kernel for use in the vDSO
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
* IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
......@@ -61,25 +62,194 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
li r3,0
blr
2: mr r3,r10
2:
mtlr r12
mr r3,r10
mr r4,r11
li r0,__NR_gettimeofday
sc
b 1b
blr
.cfi_endproc
V_FUNCTION_END(__kernel_gettimeofday)
/*
* This is the core of gettimeofday(), it returns the xsec
* Exact prototype of clock_gettime()
*
* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
*
*/
V_FUNCTION_BEGIN(__kernel_clock_gettime)
.cfi_startproc
/* Check for supported clock IDs */
cmpli cr0,r3,CLOCK_REALTIME
cmpli cr1,r3,CLOCK_MONOTONIC
cror cr0,cr0,cr1
bne cr0,99f
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r10,r3 /* r10 saves id */
mr r11,r4 /* r11 saves tp */
bl __get_datapage@local /* get data page */
mr r9, r3 /* datapage ptr in r9 */
beq cr1,50f /* if monotonic -> jump there */
/*
* CLOCK_REALTIME
*/
bl __do_get_xsec@local /* get xsec from tb & kernel */
bne- 98f /* out of line -> do syscall */
/* seconds are xsec >> 20 */
rlwinm r5,r4,12,20,31
rlwimi r5,r3,12,0,19
stw r5,TSPC32_TV_SEC(r11)
/* get remaining xsec and convert to nsec. we scale
* up remaining xsec by 12 bits and get the top 32 bits
* of the multiplication, then we multiply by 1000
*/
rlwinm r5,r4,12,0,19
lis r6,1000000@h
ori r6,r6,1000000@l
mulhwu r5,r5,r6
mulli r5,r5,1000
stw r5,TSPC32_TV_NSEC(r11)
mtlr r12
li r3,0
blr
/*
* CLOCK_MONOTONIC
*/
50: bl __do_get_xsec@local /* get xsec from tb & kernel */
bne- 98f /* out of line -> do syscall */
/* seconds are xsec >> 20 */
rlwinm r6,r4,12,20,31
rlwimi r6,r3,12,0,19
/* get remaining xsec and convert to nsec. we scale
* up remaining xsec by 12 bits and get the top 32 bits
* of the multiplication, then we multiply by 1000
*/
rlwinm r7,r4,12,0,19
lis r5,1000000@h
ori r5,r5,1000000@l
mulhwu r7,r7,r5
mulli r7,r7,1000
/* now we must fixup using wall to monotonic. We need to snapshot
* that value and do the counter trick again. Fortunately, we still
* have the counter value in r8 that was returned by __do_get_xsec.
* At this point, r6,r7 contain our sec/nsec values, r3,r4 and r5
* can be used
*/
lwz r3,WTOM_CLOCK_SEC(r9)
lwz r4,WTOM_CLOCK_NSEC(r9)
/* We now have our result in r3,r4. We create a fake dependency
* on that result and re-check the counter
*/
or r5,r4,r3
xor r0,r5,r5
add r9,r9,r0
#ifdef CONFIG_PPC64
lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
#endif
cmpl cr0,r8,r0 /* check if updated */
bne- 50b
/* Calculate and store result. Note that this mimmics the C code,
* which may cause funny results if nsec goes negative... is that
* possible at all ?
*/
add r3,r3,r6
add r4,r4,r7
lis r5,NSEC_PER_SEC@h
ori r5,r5,NSEC_PER_SEC@l
cmpli cr0,r4,r5
blt 1f
subf r4,r5,r4
addi r3,r3,1
1: stw r3,TSPC32_TV_SEC(r11)
stw r4,TSPC32_TV_NSEC(r11)
mtlr r12
li r3,0
blr
/*
* syscall fallback
*/
98:
mtlr r12
mr r3,r10
mr r4,r11
99:
li r0,__NR_clock_gettime
sc
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_gettime)
/*
* Exact prototype of clock_getres()
*
* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
*
*/
V_FUNCTION_BEGIN(__kernel_clock_getres)
.cfi_startproc
/* Check for supported clock IDs */
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
cror cr0,cr0,cr1
bne cr0,99f
li r3,0
cmpli cr0,r4,0
beqlr
lis r5,CLOCK_REALTIME_RES@h
ori r5,r5,CLOCK_REALTIME_RES@l
stw r3,TSPC32_TV_SEC(r4)
stw r5,TSPC32_TV_NSEC(r4)
blr
/*
* syscall fallback
*/
99:
li r0,__NR_clock_getres
sc
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_getres)
/*
* This is the core of gettimeofday() & friends, it returns the xsec
* value in r3 & r4 and expects the datapage ptr (non clobbered)
* in r9. clobbers r0,r4,r5,r6,r7,r8
*/
* in r9. clobbers r0,r4,r5,r6,r7,r8.
* When returning, r8 contains the counter value that can be reused
* by the monotonic clock implementation
*/
__do_get_xsec:
.cfi_startproc
/* Check for update count & load values. We use the low
* order 32 bits of the update count
*/
#ifdef CONFIG_PPC64
1: lwz r8,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
1: lwz r8,(CFG_TB_UPDATE_COUNT)(r9)
#endif
andi. r0,r8,1 /* pending update ? loop */
bne- 1b
xor r0,r8,r8 /* create dependency */
......@@ -96,9 +266,9 @@ __do_get_xsec:
cmpl cr0,r3,r0
bne- 2b
/* Substract tb orig stamp. If the high part is non-zero, we jump to the
* slow path which call the syscall. If it's ok, then we have our 32 bits
* tb_ticks value in r7
/* Substract tb orig stamp. If the high part is non-zero, we jump to
* the slow path which call the syscall.
* If it's ok, then we have our 32 bits tb_ticks value in r7
*/
subfc r7,r6,r4
subfe. r0,r5,r3
......@@ -123,9 +293,14 @@ __do_get_xsec:
/* We now have our result in r3,r4. We create a fake dependency
* on that result and re-check the counter
*/
xor r0,r4,r4
or r6,r4,r3
xor r0,r6,r6
add r9,r9,r0
#ifdef CONFIG_PPC64
lwz r0,(CFG_TB_UPDATE_COUNT+4)(r9)
#else
lwz r0,(CFG_TB_UPDATE_COUNT)(r9)
#endif
cmpl cr0,r8,r0 /* check if updated */
bne- 1b
......
......@@ -102,9 +102,12 @@ VERSION
{
VDSO_VERSION_STRING {
global:
__kernel_datapage_offset; /* Has to be there for the kernel to find it */
__kernel_datapage_offset; /* Has to be there for the kernel to find */
__kernel_get_syscall_map;
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
__kernel_get_tbfreq;
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
......
......@@ -6,7 +6,7 @@
.globl vdso32_start, vdso32_end
.balign PAGE_SIZE
vdso32_start:
.incbin "arch/ppc64/kernel/vdso32/vdso32.so"
.incbin "arch/powerpc/kernel/vdso32/vdso32.so"
.balign PAGE_SIZE
vdso32_end:
......
......@@ -66,3 +66,19 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_syscall_map)
/*
* void unsigned long __kernel_get_tbfreq(void);
*
* returns the timebase frequency in HZ
*/
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
bl V_LOCAL_FUNC(__get_datapage)
ld r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
......@@ -15,6 +15,7 @@
#include <asm/ppc_asm.h>
#include <asm/vdso.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
.text
/*
......@@ -38,7 +39,9 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
std r5,TVAL64_TV_SEC(r11) /* store sec in tv */
subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) / XSEC_PER_SEC */
mulld r0,r0,r7 /* usec = (xsec * USEC_PER_SEC) /
* XSEC_PER_SEC
*/
rldicl r0,r0,44,20
cmpldi cr0,r10,0 /* check if tz is NULL */
std r0,TVAL64_TV_USEC(r11) /* store usec in tv */
......@@ -54,37 +57,185 @@ V_FUNCTION_BEGIN(__kernel_gettimeofday)
V_FUNCTION_END(__kernel_gettimeofday)
/*
* Exact prototype of clock_gettime()
*
* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
*
*/
V_FUNCTION_BEGIN(__kernel_clock_gettime)
.cfi_startproc
/* Check for supported clock IDs */
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
cror cr0,cr0,cr1
bne cr0,99f
mflr r12 /* r12 saves lr */
.cfi_register lr,r12
mr r10,r3 /* r10 saves id */
mr r11,r4 /* r11 saves tp */
bl V_LOCAL_FUNC(__get_datapage) /* get data page */
beq cr1,50f /* if monotonic -> jump there */
/*
* CLOCK_REALTIME
*/
bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */
ori r7,r7,0xca00
rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
std r5,TSPC64_TV_SEC(r11) /* store sec in tv */
subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) /
* XSEC_PER_SEC
*/
rldicl r0,r0,44,20
std r0,TSPC64_TV_NSEC(r11) /* store nsec in tp */
mtlr r12
li r3,0
blr
/*
* CLOCK_MONOTONIC
*/
50: bl V_LOCAL_FUNC(__do_get_xsec) /* get xsec from tb & kernel */
lis r7,0x3b9a /* r7 = 1000000000 = NSEC_PER_SEC */
ori r7,r7,0xca00
rldicl r5,r4,44,20 /* r5 = sec = xsec / XSEC_PER_SEC */
rldicr r6,r5,20,43 /* r6 = sec * XSEC_PER_SEC */
subf r0,r6,r4 /* r0 = xsec = (xsec - r6) */
mulld r0,r0,r7 /* nsec = (xsec * NSEC_PER_SEC) /
* XSEC_PER_SEC
*/
rldicl r6,r0,44,20
/* now we must fixup using wall to monotonic. We need to snapshot
* that value and do the counter trick again. Fortunately, we still
* have the counter value in r8 that was returned by __do_get_xsec.
* At this point, r5,r6 contain our sec/nsec values.
* can be used
*/
lwz r4,WTOM_CLOCK_SEC(r9)
lwz r7,WTOM_CLOCK_NSEC(r9)
/* We now have our result in r4,r7. We create a fake dependency
* on that result and re-check the counter
*/
or r9,r4,r7
xor r0,r9,r9
add r3,r3,r0
ld r0,CFG_TB_UPDATE_COUNT(r3)
cmpld cr0,r0,r8 /* check if updated */
bne- 50b
/* Calculate and store result. Note that this mimmics the C code,
* which may cause funny results if nsec goes negative... is that
* possible at all ?
*/
add r4,r4,r5
add r7,r7,r6
lis r9,NSEC_PER_SEC@h
ori r9,r9,NSEC_PER_SEC@l
cmpli cr0,r7,r9
blt 1f
subf r7,r9,r7
addi r4,r4,1
1: std r4,TSPC64_TV_SEC(r11)
std r7,TSPC64_TV_NSEC(r11)
mtlr r12
li r3,0
blr
/*
* syscall fallback
*/
98:
mtlr r12
mr r3,r10
mr r4,r11
99:
li r0,__NR_clock_gettime
sc
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_gettime)
/*
* Exact prototype of clock_getres()
*
* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
*
*/
V_FUNCTION_BEGIN(__kernel_clock_getres)
.cfi_startproc
/* Check for supported clock IDs */
cmpwi cr0,r3,CLOCK_REALTIME
cmpwi cr1,r3,CLOCK_MONOTONIC
cror cr0,cr0,cr1
bne cr0,99f
li r3,0
cmpli cr0,r4,0
beqlr
lis r5,CLOCK_REALTIME_RES@h
ori r5,r5,CLOCK_REALTIME_RES@l
std r3,TSPC64_TV_SEC(r4)
std r5,TSPC64_TV_NSEC(r4)
blr
/*
* syscall fallback
*/
99:
li r0,__NR_clock_getres
sc
blr
.cfi_endproc
V_FUNCTION_END(__kernel_clock_getres)
/*
* This is the core of gettimeofday(), it returns the xsec
* value in r4 and expects the datapage ptr (non clobbered)
* in r3. clobbers r0,r4,r5,r6,r7,r8
*/
* When returning, r8 contains the counter value that can be reused
*/
V_FUNCTION_BEGIN(__do_get_xsec)
.cfi_startproc
/* check for update count & load values */
1: ld r7,CFG_TB_UPDATE_COUNT(r3)
1: ld r8,CFG_TB_UPDATE_COUNT(r3)
andi. r0,r4,1 /* pending update ? loop */
bne- 1b
xor r0,r4,r4 /* create dependency */
add r3,r3,r0
/* Get TB & offset it */
mftb r8
mftb r7
ld r9,CFG_TB_ORIG_STAMP(r3)
subf r8,r9,r8
subf r7,r9,r7
/* Scale result */
ld r5,CFG_TB_TO_XS(r3)
mulhdu r8,r8,r5
mulhdu r7,r7,r5
/* Add stamp since epoch */
ld r6,CFG_STAMP_XSEC(r3)
add r4,r6,r8
add r4,r6,r7
xor r0,r4,r4
add r3,r3,r0
ld r0,CFG_TB_UPDATE_COUNT(r3)
cmpld cr0,r0,r7 /* check if updated */
cmpld cr0,r0,r8 /* check if updated */
bne- 1b
blr
.cfi_endproc
......
......@@ -102,9 +102,12 @@ VERSION
{
VDSO_VERSION_STRING {
global:
__kernel_datapage_offset; /* Has to be there for the kernel to find it */
__kernel_datapage_offset; /* Has to be there for the kernel to find */
__kernel_get_syscall_map;
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
__kernel_get_tbfreq;
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp_rt64;
......
......@@ -6,7 +6,7 @@
.globl vdso64_start, vdso64_end
.balign PAGE_SIZE
vdso64_start:
.incbin "arch/ppc64/kernel/vdso64/vdso64.so"
.incbin "arch/powerpc/kernel/vdso64/vdso64.so"
.balign PAGE_SIZE
vdso64_end:
......
......@@ -46,9 +46,7 @@
#include <asm/prom.h>
#include <asm/lmb.h>
#include <asm/sections.h>
#ifdef CONFIG_PPC64
#include <asm/vdso.h>
#endif
#include "mmu_decl.h"
......@@ -397,10 +395,8 @@ void __init mem_init(void)
mem_init_done = 1;
#ifdef CONFIG_PPC64
/* Initialize the vDSO */
vdso_init();
#endif
}
/*
......
......@@ -17,9 +17,8 @@
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <asm/sparsemem.h>
#include <asm/lmb.h>
#include <asm/machdep.h>
#include <asm/abs_addr.h>
#include <asm/system.h>
#include <asm/smp.h>
......@@ -28,45 +27,113 @@ static int numa_enabled = 1;
static int numa_debug;
#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
#ifdef DEBUG_NUMA
#define ARRAY_INITIALISER -1
#else
#define ARRAY_INITIALISER 0
#endif
int numa_cpu_lookup_table[NR_CPUS] = { [ 0 ... (NR_CPUS - 1)] =
ARRAY_INITIALISER};
char *numa_memory_lookup_table;
int numa_cpu_lookup_table[NR_CPUS];
cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES];
int nr_cpus_in_node[MAX_NUMNODES] = { [0 ... (MAX_NUMNODES -1)] = 0};
struct pglist_data *node_data[MAX_NUMNODES];
bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
EXPORT_SYMBOL(node_data);
static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES];
static int min_common_depth;
/*
* We need somewhere to store start/span for each node until we have
* We need somewhere to store start/end/node for each region until we have
* allocated the real node_data structures.
*/
#define MAX_REGIONS (MAX_LMB_REGIONS*2)
static struct {
unsigned long node_start_pfn;
unsigned long node_end_pfn;
unsigned long node_present_pages;
} init_node_data[MAX_NUMNODES] __initdata;
unsigned long start_pfn;
unsigned long end_pfn;
int nid;
} init_node_data[MAX_REGIONS] __initdata;
EXPORT_SYMBOL(node_data);
EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(numa_memory_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
EXPORT_SYMBOL(nr_cpus_in_node);
int __init early_pfn_to_nid(unsigned long pfn)
{
unsigned int i;
for (i = 0; init_node_data[i].end_pfn; i++) {
unsigned long start_pfn = init_node_data[i].start_pfn;
unsigned long end_pfn = init_node_data[i].end_pfn;
if ((start_pfn <= pfn) && (pfn < end_pfn))
return init_node_data[i].nid;
}
return -1;
}
void __init add_region(unsigned int nid, unsigned long start_pfn,
unsigned long pages)
{
unsigned int i;
dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",
nid, start_pfn, pages);
for (i = 0; init_node_data[i].end_pfn; i++) {
if (init_node_data[i].nid != nid)
continue;
if (init_node_data[i].end_pfn == start_pfn) {
init_node_data[i].end_pfn += pages;
return;
}
if (init_node_data[i].start_pfn == (start_pfn + pages)) {
init_node_data[i].start_pfn -= pages;
return;
}
}
/*
* Leave last entry NULL so we dont iterate off the end (we use
* entry.end_pfn to terminate the walk).
*/
if (i >= (MAX_REGIONS - 1)) {
printk(KERN_ERR "WARNING: too many memory regions in "
"numa code, truncating\n");
return;
}
init_node_data[i].start_pfn = start_pfn;
init_node_data[i].end_pfn = start_pfn + pages;
init_node_data[i].nid = nid;
}
/* We assume init_node_data has no overlapping regions */
void __init get_region(unsigned int nid, unsigned long *start_pfn,
unsigned long *end_pfn, unsigned long *pages_present)
{
unsigned int i;
*start_pfn = -1UL;
*end_pfn = *pages_present = 0;
for (i = 0; init_node_data[i].end_pfn; i++) {
if (init_node_data[i].nid != nid)
continue;
*pages_present += init_node_data[i].end_pfn -
init_node_data[i].start_pfn;
if (init_node_data[i].start_pfn < *start_pfn)
*start_pfn = init_node_data[i].start_pfn;
if (init_node_data[i].end_pfn > *end_pfn)
*end_pfn = init_node_data[i].end_pfn;
}
/* We didnt find a matching region, return start/end as 0 */
if (*start_pfn == -1UL)
start_pfn = 0;
}
static inline void map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) {
if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node])))
cpu_set(cpu, numa_cpumask_lookup_table[node]);
nr_cpus_in_node[node]++;
}
}
#ifdef CONFIG_HOTPLUG_CPU
......@@ -78,7 +145,6 @@ static void unmap_cpu_from_node(unsigned long cpu)
if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) {
cpu_clear(cpu, numa_cpumask_lookup_table[node]);
nr_cpus_in_node[node]--;
} else {
printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
cpu, node);
......@@ -86,7 +152,7 @@ static void unmap_cpu_from_node(unsigned long cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
static struct device_node * __devinit find_cpu_node(unsigned int cpu)
static struct device_node *find_cpu_node(unsigned int cpu)
{
unsigned int hw_cpuid = get_hard_smp_processor_id(cpu);
struct device_node *cpu_node = NULL;
......@@ -213,7 +279,7 @@ static int __init get_mem_size_cells(void)
return rc;
}
static unsigned long read_n_cells(int n, unsigned int **buf)
static unsigned long __init read_n_cells(int n, unsigned int **buf)
{
unsigned long result = 0;
......@@ -295,7 +361,8 @@ static int cpu_numa_callback(struct notifier_block *nfb,
* or zero. If the returned value of size is 0 the region should be
* discarded as it lies wholy above the memory limit.
*/
static unsigned long __init numa_enforce_memory_limit(unsigned long start, unsigned long size)
static unsigned long __init numa_enforce_memory_limit(unsigned long start,
unsigned long size)
{
/*
* We use lmb_end_of_DRAM() in here instead of memory_limit because
......@@ -320,8 +387,7 @@ static int __init parse_numa_properties(void)
struct device_node *cpu = NULL;
struct device_node *memory = NULL;
int addr_cells, size_cells;
int max_domain = 0;
long entries = lmb_end_of_DRAM() >> MEMORY_INCREMENT_SHIFT;
int max_domain;
unsigned long i;
if (numa_enabled == 0) {
......@@ -329,13 +395,6 @@ static int __init parse_numa_properties(void)
return -1;
}
numa_memory_lookup_table =
(char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
memset(numa_memory_lookup_table, 0, entries * sizeof(char));
for (i = 0; i < entries ; i++)
numa_memory_lookup_table[i] = ARRAY_INITIALISER;
min_common_depth = find_min_common_depth();
dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
......@@ -387,9 +446,6 @@ static int __init parse_numa_properties(void)
start = read_n_cells(addr_cells, &memcell_buf);
size = read_n_cells(size_cells, &memcell_buf);
start = _ALIGN_DOWN(start, MEMORY_INCREMENT);
size = _ALIGN_UP(size, MEMORY_INCREMENT);
numa_domain = of_node_numa_domain(memory);
if (numa_domain >= MAX_NUMNODES) {
......@@ -403,44 +459,15 @@ static int __init parse_numa_properties(void)
if (max_domain < numa_domain)
max_domain = numa_domain;
if (! (size = numa_enforce_memory_limit(start, size))) {
if (!(size = numa_enforce_memory_limit(start, size))) {
if (--ranges)
goto new_range;
else
continue;
}
/*
* Initialize new node struct, or add to an existing one.
*/
if (init_node_data[numa_domain].node_end_pfn) {
if ((start / PAGE_SIZE) <
init_node_data[numa_domain].node_start_pfn)
init_node_data[numa_domain].node_start_pfn =
start / PAGE_SIZE;
if (((start / PAGE_SIZE) + (size / PAGE_SIZE)) >
init_node_data[numa_domain].node_end_pfn)
init_node_data[numa_domain].node_end_pfn =
(start / PAGE_SIZE) +
(size / PAGE_SIZE);
init_node_data[numa_domain].node_present_pages +=
size / PAGE_SIZE;
} else {
node_set_online(numa_domain);
init_node_data[numa_domain].node_start_pfn =
start / PAGE_SIZE;
init_node_data[numa_domain].node_end_pfn =
init_node_data[numa_domain].node_start_pfn +
size / PAGE_SIZE;
init_node_data[numa_domain].node_present_pages =
size / PAGE_SIZE;
}
for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
numa_domain;
add_region(numa_domain, start >> PAGE_SHIFT,
size >> PAGE_SHIFT);
if (--ranges)
goto new_range;
......@@ -456,32 +483,15 @@ static void __init setup_nonnuma(void)
{
unsigned long top_of_ram = lmb_end_of_DRAM();
unsigned long total_ram = lmb_phys_mem_size();
unsigned long i;
printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram);
printk(KERN_INFO "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
if (!numa_memory_lookup_table) {
long entries = top_of_ram >> MEMORY_INCREMENT_SHIFT;
numa_memory_lookup_table =
(char *)abs_to_virt(lmb_alloc(entries * sizeof(char), 1));
memset(numa_memory_lookup_table, 0, entries * sizeof(char));
for (i = 0; i < entries ; i++)
numa_memory_lookup_table[i] = ARRAY_INITIALISER;
}
map_cpu_to_node(boot_cpuid, 0);
add_region(0, 0, lmb_end_of_DRAM() >> PAGE_SHIFT);
node_set_online(0);
init_node_data[0].node_start_pfn = 0;
init_node_data[0].node_end_pfn = lmb_end_of_DRAM() / PAGE_SIZE;
init_node_data[0].node_present_pages = total_ram / PAGE_SIZE;
for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
}
static void __init dump_numa_topology(void)
......@@ -499,8 +509,9 @@ static void __init dump_numa_topology(void)
count = 0;
for (i = 0; i < lmb_end_of_DRAM(); i += MEMORY_INCREMENT) {
if (numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] == node) {
for (i = 0; i < lmb_end_of_DRAM();
i += (1 << SECTION_SIZE_BITS)) {
if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
if (count == 0)
printk(" 0x%lx", i);
++count;
......@@ -525,10 +536,12 @@ static void __init dump_numa_topology(void)
*
* Returns the physical address of the memory.
*/
static unsigned long careful_allocation(int nid, unsigned long size,
unsigned long align, unsigned long end)
static void __init *careful_allocation(int nid, unsigned long size,
unsigned long align,
unsigned long end_pfn)
{
unsigned long ret = lmb_alloc_base(size, align, end);
int new_nid;
unsigned long ret = lmb_alloc_base(size, align, end_pfn << PAGE_SHIFT);
/* retry over all memory */
if (!ret)
......@@ -542,28 +555,27 @@ static unsigned long careful_allocation(int nid, unsigned long size,
* If the memory came from a previously allocated node, we must
* retry with the bootmem allocator.
*/
if (pa_to_nid(ret) < nid) {
nid = pa_to_nid(ret);
ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(nid),
new_nid = early_pfn_to_nid(ret >> PAGE_SHIFT);
if (new_nid < nid) {
ret = (unsigned long)__alloc_bootmem_node(NODE_DATA(new_nid),
size, align, 0);
if (!ret)
panic("numa.c: cannot allocate %lu bytes on node %d",
size, nid);
size, new_nid);
ret = virt_to_abs(ret);
ret = __pa(ret);
dbg("alloc_bootmem %lx %lx\n", ret, size);
}
return ret;
return (void *)ret;
}
void __init do_init_bootmem(void)
{
int nid;
int addr_cells, size_cells;
struct device_node *memory = NULL;
unsigned int i;
static struct notifier_block ppc64_numa_nb = {
.notifier_call = cpu_numa_callback,
.priority = 1 /* Must run before sched domains notifier. */
......@@ -581,99 +593,66 @@ void __init do_init_bootmem(void)
register_cpu_notifier(&ppc64_numa_nb);
for_each_online_node(nid) {
unsigned long start_paddr, end_paddr;
int i;
unsigned long start_pfn, end_pfn, pages_present;
unsigned long bootmem_paddr;
unsigned long bootmap_pages;
start_paddr = init_node_data[nid].node_start_pfn * PAGE_SIZE;
end_paddr = init_node_data[nid].node_end_pfn * PAGE_SIZE;
get_region(nid, &start_pfn, &end_pfn, &pages_present);
/* Allocate the node structure node local if possible */
NODE_DATA(nid) = (struct pglist_data *)careful_allocation(nid,
NODE_DATA(nid) = careful_allocation(nid,
sizeof(struct pglist_data),
SMP_CACHE_BYTES, end_paddr);
NODE_DATA(nid) = abs_to_virt(NODE_DATA(nid));
SMP_CACHE_BYTES, end_pfn);
NODE_DATA(nid) = __va(NODE_DATA(nid));
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
dbg("node %d\n", nid);
dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
NODE_DATA(nid)->bdata = &plat_node_bdata[nid];
NODE_DATA(nid)->node_start_pfn =
init_node_data[nid].node_start_pfn;
NODE_DATA(nid)->node_spanned_pages =
end_paddr - start_paddr;
NODE_DATA(nid)->node_start_pfn = start_pfn;
NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
if (NODE_DATA(nid)->node_spanned_pages == 0)
continue;
dbg("start_paddr = %lx\n", start_paddr);
dbg("end_paddr = %lx\n", end_paddr);
bootmap_pages = bootmem_bootmap_pages((end_paddr - start_paddr) >> PAGE_SHIFT);
dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
bootmem_paddr = careful_allocation(nid,
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
bootmem_paddr = (unsigned long)careful_allocation(nid,
bootmap_pages << PAGE_SHIFT,
PAGE_SIZE, end_paddr);
memset(abs_to_virt(bootmem_paddr), 0,
bootmap_pages << PAGE_SHIFT);
PAGE_SIZE, end_pfn);
memset(__va(bootmem_paddr), 0, bootmap_pages << PAGE_SHIFT);
dbg("bootmap_paddr = %lx\n", bootmem_paddr);
init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
start_paddr >> PAGE_SHIFT,
end_paddr >> PAGE_SHIFT);
/*
* We need to do another scan of all memory sections to
* associate memory with the correct node.
*/
addr_cells = get_mem_addr_cells();
size_cells = get_mem_size_cells();
memory = NULL;
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
unsigned long mem_start, mem_size;
int numa_domain, ranges;
unsigned int *memcell_buf;
unsigned int len;
memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
if (!memcell_buf || len <= 0)
continue;
start_pfn, end_pfn);
ranges = memory->n_addrs; /* ranges in cell */
new_range:
mem_start = read_n_cells(addr_cells, &memcell_buf);
mem_size = read_n_cells(size_cells, &memcell_buf);
if (numa_enabled) {
numa_domain = of_node_numa_domain(memory);
if (numa_domain >= MAX_NUMNODES)
numa_domain = 0;
} else
numa_domain = 0;
/* Add free regions on this node */
for (i = 0; init_node_data[i].end_pfn; i++) {
unsigned long start, end;
if (numa_domain != nid)
if (init_node_data[i].nid != nid)
continue;
mem_size = numa_enforce_memory_limit(mem_start, mem_size);
if (mem_size) {
dbg("free_bootmem %lx %lx\n", mem_start, mem_size);
free_bootmem_node(NODE_DATA(nid), mem_start, mem_size);
}
start = init_node_data[i].start_pfn << PAGE_SHIFT;
end = init_node_data[i].end_pfn << PAGE_SHIFT;
if (--ranges) /* process all ranges in cell */
goto new_range;
dbg("free_bootmem %lx %lx\n", start, end - start);
free_bootmem_node(NODE_DATA(nid), start, end - start);
}
/*
* Mark reserved regions on this node
*/
/* Mark reserved regions on this node */
for (i = 0; i < lmb.reserved.cnt; i++) {
unsigned long physbase = lmb.reserved.region[i].base;
unsigned long size = lmb.reserved.region[i].size;
unsigned long start_paddr = start_pfn << PAGE_SHIFT;
unsigned long end_paddr = end_pfn << PAGE_SHIFT;
if (pa_to_nid(physbase) != nid &&
pa_to_nid(physbase+size-1) != nid)
if (early_pfn_to_nid(physbase >> PAGE_SHIFT) != nid &&
early_pfn_to_nid((physbase+size-1) >> PAGE_SHIFT) != nid)
continue;
if (physbase < end_paddr &&
......@@ -693,46 +672,19 @@ void __init do_init_bootmem(void)
size);
}
}
/*
* This loop may look famaliar, but we have to do it again
* after marking our reserved memory to mark memory present
* for sparsemem.
*/
addr_cells = get_mem_addr_cells();
size_cells = get_mem_size_cells();
memory = NULL;
while ((memory = of_find_node_by_type(memory, "memory")) != NULL) {
unsigned long mem_start, mem_size;
int numa_domain, ranges;
unsigned int *memcell_buf;
unsigned int len;
memcell_buf = (unsigned int *)get_property(memory, "reg", &len);
if (!memcell_buf || len <= 0)
continue;
ranges = memory->n_addrs; /* ranges in cell */
new_range2:
mem_start = read_n_cells(addr_cells, &memcell_buf);
mem_size = read_n_cells(size_cells, &memcell_buf);
if (numa_enabled) {
numa_domain = of_node_numa_domain(memory);
if (numa_domain >= MAX_NUMNODES)
numa_domain = 0;
} else
numa_domain = 0;
/* Add regions into sparsemem */
for (i = 0; init_node_data[i].end_pfn; i++) {
unsigned long start, end;
if (numa_domain != nid)
if (init_node_data[i].nid != nid)
continue;
mem_size = numa_enforce_memory_limit(mem_start, mem_size);
memory_present(numa_domain, mem_start >> PAGE_SHIFT,
(mem_start + mem_size) >> PAGE_SHIFT);
start = init_node_data[i].start_pfn;
end = init_node_data[i].end_pfn;
if (--ranges) /* process all ranges in cell */
goto new_range2;
memory_present(nid, start, end);
}
}
}
......@@ -746,21 +698,18 @@ void __init paging_init(void)
memset(zholes_size, 0, sizeof(zholes_size));
for_each_online_node(nid) {
unsigned long start_pfn;
unsigned long end_pfn;
unsigned long start_pfn, end_pfn, pages_present;
start_pfn = init_node_data[nid].node_start_pfn;
end_pfn = init_node_data[nid].node_end_pfn;
get_region(nid, &start_pfn, &end_pfn, &pages_present);
zones_size[ZONE_DMA] = end_pfn - start_pfn;
zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
init_node_data[nid].node_present_pages;
zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
free_area_init_node(nid, NODE_DATA(nid), zones_size,
start_pfn, zholes_size);
free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
zholes_size);
}
}
......
......@@ -14,7 +14,6 @@
#include <asm/system.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/systemcfg.h>
#include <asm/rtas.h>
#include <asm/oprofile_impl.h>
#include <asm/reg.h>
......
......@@ -27,7 +27,6 @@
#include <asm/prom.h>
#include <asm/nvram.h>
#include <asm/atomic.h>
#include <asm/systemcfg.h>
#if 0
#define DEBUG(A...) printk(KERN_ERR A)
......
......@@ -306,9 +306,7 @@ static void __init fw_feature_init(void)
}
of_node_put(dn);
no_rtas:
printk(KERN_INFO "firmware_features = 0x%lx\n",
ppc64_firmware_features);
no_rtas:
DBG(" <- fw_feature_init()\n");
}
......
......@@ -46,7 +46,7 @@
#include <asm/rtas.h>
#include <asm/pSeries_reconfig.h>
#include <asm/mpic.h>
#include <asm/systemcfg.h>
#include <asm/vdso_datapage.h>
#include "plpar_wrappers.h"
......@@ -97,7 +97,7 @@ int pSeries_cpu_disable(void)
int cpu = smp_processor_id();
cpu_clear(cpu, cpu_online_map);
_systemcfg->processorCount--;
vdso_data->processorCount--;
/*fix boot_cpuid here*/
if (cpu == boot_cpuid)
......
......@@ -1467,17 +1467,23 @@ read_spr(int n)
{
unsigned int instrs[2];
unsigned long (*code)(void);
unsigned long opd[3];
unsigned long ret = -1UL;
#ifdef CONFIG_PPC64
unsigned long opd[3];
instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
instrs[1] = 0x4e800020;
opd[0] = (unsigned long)instrs;
opd[1] = 0;
opd[2] = 0;
code = (unsigned long (*)(void)) opd;
#else
code = (unsigned long (*)(void)) instrs;
#endif
/* mfspr r3,n; blr */
instrs[0] = 0x7c6002a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
instrs[1] = 0x4e800020;
store_inst(instrs);
store_inst(instrs+1);
code = (unsigned long (*)(void)) opd;
if (setjmp(bus_error_jmp) == 0) {
catch_memory_errors = 1;
......@@ -1499,16 +1505,21 @@ write_spr(int n, unsigned long val)
{
unsigned int instrs[2];
unsigned long (*code)(unsigned long);
#ifdef CONFIG_PPC64
unsigned long opd[3];
instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
instrs[1] = 0x4e800020;
opd[0] = (unsigned long)instrs;
opd[1] = 0;
opd[2] = 0;
code = (unsigned long (*)(unsigned long)) opd;
#else
code = (unsigned long (*)(unsigned long)) instrs;
#endif
instrs[0] = 0x7c6003a6 + ((n & 0x1F) << 16) + ((n & 0x3e0) << 6);
instrs[1] = 0x4e800020;
store_inst(instrs);
store_inst(instrs+1);
code = (unsigned long (*)(unsigned long)) opd;
if (setjmp(bus_error_jmp) == 0) {
catch_memory_errors = 1;
......
......@@ -25,6 +25,7 @@
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/vdso_datapage.h>
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
......@@ -143,5 +144,32 @@ main(void)
DEFINE(TASK_SIZE, TASK_SIZE);
DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28);
/* datapage offsets for use by vdso */
DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
DEFINE(TSPEC32_TV_SEC, offsetof(struct timespec, tv_sec));
DEFINE(TSPEC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
/* timeval/timezone offsets for use by vdso */
DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
/* Other bits used by the vdso */
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
DEFINE(CLOCK_REALTIME_RES, TICK_NSEC);
return 0;
}
......@@ -37,7 +37,7 @@ static u_char irq_to_siureg[] = {
static u_char irq_to_siubit[] = {
0, 15, 14, 13, 12, 11, 10, 9,
8, 7, 6, 5, 4, 3, 2, 1,
2, 1, 15, 14, 13, 12, 11, 10,
2, 1, 0, 14, 13, 12, 11, 10,
9, 8, 7, 6, 5, 4, 3, 0,
31, 30, 29, 28, 27, 26, 25, 24,
23, 22, 21, 20, 19, 18, 17, 16,
......
......@@ -279,17 +279,12 @@ config ARCH_FLATMEM_ENABLE
def_bool y
depends on !NUMA
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
depends on SMP && PPC_PSERIES
config ARCH_DISCONTIGMEM_DEFAULT
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on ARCH_DISCONTIGMEM_ENABLE
config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on ARCH_DISCONTIGMEM_ENABLE
depends on NUMA
source "mm/Kconfig"
......
......@@ -13,10 +13,8 @@ endif
obj-y += idle.o dma.o \
align.o \
udbg.o \
rtc.o \
iommu.o vdso.o
obj-y += vdso32/ vdso64/
iommu.o
pci-obj-$(CONFIG_PPC_MULTIPLATFORM) += pci_dn.o pci_direct_iommu.o
......@@ -27,8 +25,6 @@ ifneq ($(CONFIG_PPC_MERGE),y)
obj-$(CONFIG_PPC_MULTIPLATFORM) += prom_init.o
endif
obj-$(CONFIG_PPC_PSERIES) += udbg_16550.o
obj-$(CONFIG_KEXEC) += machine_kexec.o
obj-$(CONFIG_MODULES) += module.o
ifneq ($(CONFIG_PPC_MERGE),y)
......@@ -40,10 +36,6 @@ obj-$(CONFIG_BOOTX_TEXT) += btext.o
endif
obj-$(CONFIG_HVCS) += hvcserver.o
obj-$(CONFIG_PPC_PMAC) += udbg_scc.o
obj-$(CONFIG_PPC_MAPLE) += udbg_16550.o
obj-$(CONFIG_KPROBES) += kprobes.o
ifneq ($(CONFIG_PPC_MERGE),y)
......
......@@ -186,7 +186,8 @@ _KPROBE(__flush_icache_range)
bdnz 2b
isync
blr
.previous .text
.text
/*
* Like above, but only do the D-cache.
*
......
......@@ -14,8 +14,6 @@
/* The vDSO location. We have to use the same value as x86 for glibc's
* sake :-)
*/
#ifdef __powerpc64__
#define AT_SYSINFO_EHDR 33
#endif
#endif
......@@ -269,14 +269,12 @@ extern int dcache_bsize;
extern int icache_bsize;
extern int ucache_bsize;
#ifdef __powerpc64__
/* vDSO has arch_setup_additional_pages */
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm;
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES /* vDSO has arch_setup_additional_pages */
extern int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack);
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack);
#define VDSO_AUX_ENT(a,b) NEW_AUX_ENT(a,b);
#else
#define VDSO_AUX_ENT(a,b)
#endif /* __powerpc64__ */
/*
* The requirements here are:
......
......@@ -389,6 +389,7 @@ extern u64 ppc64_interrupt_controller;
#define SIU_INT_TIMER4 ((uint)0x0f + CPM_IRQ_OFFSET)
#define SIU_INT_TMCNT ((uint)0x10 + CPM_IRQ_OFFSET)
#define SIU_INT_PIT ((uint)0x11 + CPM_IRQ_OFFSET)
#define SIU_INT_PCI ((uint)0x12 + CPM_IRQ_OFFSET)
#define SIU_INT_IRQ1 ((uint)0x13 + CPM_IRQ_OFFSET)
#define SIU_INT_IRQ2 ((uint)0x14 + CPM_IRQ_OFFSET)
#define SIU_INT_IRQ3 ((uint)0x15 + CPM_IRQ_OFFSET)
......
......@@ -177,8 +177,8 @@ struct thread_struct {
#ifdef CONFIG_PPC64
unsigned long start_tb; /* Start purr when proc switched in */
unsigned long accum_tb; /* Total accumilated purr for process */
unsigned long vdso_base; /* base of the vDSO library */
#endif
unsigned long vdso_base; /* base of the vDSO library */
unsigned long dabr; /* Data address breakpoint register */
#ifdef CONFIG_ALTIVEC
/* Complete AltiVec register set */
......
......@@ -8,8 +8,8 @@
* MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
*/
#define SECTION_SIZE_BITS 24
#define MAX_PHYSADDR_BITS 38
#define MAX_PHYSMEM_BITS 36
#define MAX_PHYSADDR_BITS 44
#define MAX_PHYSMEM_BITS 44
#ifdef CONFIG_MEMORY_HOTPLUG
extern void create_section_mapping(unsigned long start, unsigned long end);
......
......@@ -9,15 +9,7 @@
static inline int cpu_to_node(int cpu)
{
int node;
node = numa_cpu_lookup_table[cpu];
#ifdef DEBUG_NUMA
BUG_ON(node == -1);
#endif
return node;
return numa_cpu_lookup_table[cpu];
}
#define parent_node(node) (node)
......@@ -37,8 +29,6 @@ static inline int node_to_first_cpu(int node)
#define pcibus_to_node(node) (-1)
#define pcibus_to_cpumask(bus) (cpu_online_map)
#define nr_cpus_node(node) (nr_cpus_in_node[node])
/* sched_domains SD_NODE_INIT for PPC64 machines */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
......
#ifndef __UDBG_HDR
#define __UDBG_HDR
#include <linux/compiler.h>
#include <linux/init.h>
/*
* c 2001 PPC 64 Team, IBM Corp
*
......@@ -13,6 +7,12 @@
* 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_UDBG_H
#define _ASM_POWERPC_UDBG_H
#include <linux/compiler.h>
#include <linux/init.h>
extern void (*udbg_putc)(unsigned char c);
extern unsigned char (*udbg_getc)(void);
extern int (*udbg_getc_poll)(void);
......@@ -28,4 +28,4 @@ extern void udbg_init_uart(void __iomem *comport, unsigned int speed);
struct device_node;
extern void udbg_init_scc(struct device_node *np);
#endif
#endif /* _ASM_POWERPC_UDBG_H */
#ifndef _SYSTEMCFG_H
#define _SYSTEMCFG_H
#ifndef _VDSO_DATAPAGE_H
#define _VDSO_DATAPAGE_H
/*
* Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM
* Copyright (C) 2005 Benjamin Herrenschmidy <benh@kernel.crashing.org>,
* IBM Corp.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
......@@ -10,9 +12,20 @@
* 2 of the License, or (at your option) any later version.
*/
/* Change Activity:
* 2002/09/30 : bergner : Created
* End Change Activity
/*
* Note about this structure:
*
* This structure was historically called systemcfg and exposed to
* userland via /proc/ppc64/systemcfg. Unfortunately, this became an
* ABI issue as some proprietary software started relying on being able
* to mmap() it, thus we have to keep the base layout at least for a
* few kernel versions.
*
* However, since ppc32 doesn't suffer from this backward handicap,
* a simpler version of the data structure is used there with only the
* fields actually used by the vDSO.
*
*/
/*
......@@ -28,7 +41,13 @@
#define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32)
struct systemcfg {
/*
* So here is the ppc64 backward compatible version
*/
#ifdef CONFIG_PPC64
struct vdso_data {
__u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */
struct { /* Systemcfg version numbers */
__u32 major; /* Major number 0x10 */
......@@ -46,17 +65,42 @@ struct systemcfg {
__u64 tb_update_count; /* Timebase atomicity ctr 0x50 */
__u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */
__u32 tz_dsttime; /* Type of dst correction 0x5C */
/* next four are no longer used except to be exported to /proc */
__u32 dcache_size; /* L1 d-cache size 0x60 */
__u32 dcache_line_size; /* L1 d-cache line size 0x64 */
__u32 icache_size; /* L1 i-cache size 0x68 */
__u32 icache_line_size; /* L1 i-cache line size 0x6C */
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of available syscalls 0x70 */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of available syscalls */
/* those additional ones don't have to be located anywhere
* special as they were not part of the original systemcfg
*/
__s64 wtom_clock_sec; /* Wall to monotonic clock */
__s32 wtom_clock_nsec;
__u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
};
#else /* CONFIG_PPC64 */
/*
* And here is the simpler 32 bits version
*/
struct vdso_data {
__u64 tb_orig_stamp; /* Timebase at boot 0x30 */
__u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */
__u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */
__u64 stamp_xsec; /* 0x48 */
__u32 tb_update_count; /* Timebase atomicity ctr 0x50 */
__u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */
__u32 tz_dsttime; /* Type of dst correction 0x5C */
__s32 wtom_clock_sec; /* Wall to monotonic clock */
__s32 wtom_clock_nsec;
__u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
};
#endif /* CONFIG_PPC64 */
#ifdef __KERNEL__
extern struct systemcfg *_systemcfg; /* to be renamed */
extern struct vdso_data *vdso_data;
#endif
#endif /* __ASSEMBLY__ */
......
#ifndef _PPC_PAGE_H
#define _PPC_PAGE_H
#include <linux/config.h>
#include <asm/asm-compat.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
/*
* Subtle: this is an int (not an unsigned long) and so it
......@@ -169,5 +172,8 @@ extern __inline__ int get_order(unsigned long size)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
/* We do define AT_SYSINFO_EHDR but don't use the gate mecanism */
#define __HAVE_ARCH_GATE_AREA 1
#endif /* __KERNEL__ */
#endif /* _PPC_PAGE_H */
......@@ -8,15 +8,14 @@
#define _ASM_MMZONE_H_
#include <linux/config.h>
#include <asm/smp.h>
/* generic non-linear memory support:
/*
* generic non-linear memory support:
*
* 1) we will not split memory into more chunks than will fit into the
* flags field of the struct page
*/
#ifdef CONFIG_NEED_MULTIPLE_NODES
extern struct pglist_data *node_data[];
......@@ -30,44 +29,11 @@ extern struct pglist_data *node_data[];
*/
extern int numa_cpu_lookup_table[];
extern char *numa_memory_lookup_table;
extern cpumask_t numa_cpumask_lookup_table[];
extern int nr_cpus_in_node[];
#ifdef CONFIG_MEMORY_HOTPLUG
extern unsigned long max_pfn;
#endif
/* 16MB regions */
#define MEMORY_INCREMENT_SHIFT 24
#define MEMORY_INCREMENT (1UL << MEMORY_INCREMENT_SHIFT)
/* NUMA debugging, will not work on a DLPAR machine */
#undef DEBUG_NUMA
static inline int pa_to_nid(unsigned long pa)
{
int nid;
#ifdef CONFIG_MEMORY_HOTPLUG
/* kludge hot added sections default to node 0 */
if (pa >= (max_pfn << PAGE_SHIFT))
return 0;
#endif
nid = numa_memory_lookup_table[pa >> MEMORY_INCREMENT_SHIFT];
#ifdef DEBUG_NUMA
/* the physical address passed in is not in the map for the system */
if (nid == -1) {
printk("bad address: %lx\n", pa);
BUG();
}
#endif
return nid;
}
#define node_localnr(pfn, nid) ((pfn) - NODE_DATA(nid)->node_start_pfn)
/*
* Following are macros that each numa implmentation must define.
*/
......@@ -75,39 +41,10 @@ static inline int pa_to_nid(unsigned long pa)
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn)
#ifdef CONFIG_DISCONTIGMEM
/*
* Given a kernel address, find the home node of the underlying memory.
*/
#define kvaddr_to_nid(kaddr) pa_to_nid(__pa(kaddr))
#define pfn_to_nid(pfn) pa_to_nid((unsigned long)(pfn) << PAGE_SHIFT)
/* Written this way to avoid evaluating arguments twice */
#define discontigmem_pfn_to_page(pfn) \
({ \
unsigned long __tmp = pfn; \
(NODE_DATA(pfn_to_nid(__tmp))->node_mem_map + \
node_localnr(__tmp, pfn_to_nid(__tmp))); \
})
#define discontigmem_page_to_pfn(p) \
({ \
struct page *__tmp = p; \
(((__tmp) - page_zone(__tmp)->zone_mem_map) + \
page_zone(__tmp)->zone_start_pfn); \
})
/* XXX fix for discontiguous physical memory */
#define discontigmem_pfn_valid(pfn) ((pfn) < num_physpages)
#endif /* CONFIG_DISCONTIGMEM */
#endif /* CONFIG_NEED_MULTIPLE_NODES */
#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
#define early_pfn_to_nid(pfn) pa_to_nid(((unsigned long)pfn) << PAGE_SHIFT)
extern int __init early_pfn_to_nid(unsigned long pfn);
#endif
#endif /* _ASM_MMZONE_H_ */
......@@ -279,11 +279,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */
#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE))
#ifdef CONFIG_DISCONTIGMEM
#define page_to_pfn(page) discontigmem_page_to_pfn(page)
#define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn)
#define pfn_valid(pfn) discontigmem_pfn_valid(pfn)
#endif
#ifdef CONFIG_FLATMEM
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment