Commit d8f19f2c authored by Andi Kleen's avatar Andi Kleen Committed by Vojtech Pavlik

[PATCH] x86-64 merge

This brings the x86-64 port uptodate in 2.5.60. Unfortunately I cannot
test too much because i constantly get deadlocks in exit/wait in initscripts
on SMP bootup. The kernel seems to still lose a lot of SIGCHLD. 2.5.59/SMP
had the same problem. Uniprocessor and SMP kernel on UP seems to work.

This patch only touches x86-64 specific files. It requires a few simple
changes to arch independent files that I will send separately.

 - Fixed a lot of obsolete/misleading configure help texts.
 - Remove old bootblock disk loader and support fdimage target for syslinux
   instead (H. Peter Anvin)
 - Fix potential fpu signal restore problem on 32bit emulation.
 - Merge with 2.5.60 i386 (hugetlbfs, acpi etc.)
 - Some fixes for local apic disabled modus.
 - Beginngs of S3 ACPI wakeup from real-mode (not working yet, don't use)
 - Beginnings of NUMA/CONFIG_DISCONTIGMEM support for AMD K8 (work in progress,
   port from 2.4): clean up memory mapping at bootup, generalize bootmem etc.
 - Fix 64bit GS base reload problem and reenable (Karsten Keil)
 - Fix race with vmalloc accesses from interrupt handlers disturbing page fault/
   similar race for the debug handler (thanks to Andrew Morton)
 - Merge cpu access primitives with i386
 - Revert to private module list for now because putting modules
   nto vmlist triggered too many problems.
 - Some cleanups, removal of unneeded code.
 - Let early __get_free_pages see consistent pda
 - Preempt disabled for now because it is too broken right now
 - Signal handler fixes
 - Fix do_gettimeofday to be completely lockless and reenable vsyscalls
 - Optimize context switch path a bit (should be ported to i386)
 - Get thread_info via stack for better code
 - Don't leak pmd pages
 - Clean up hardcoded task stack sizes.
parent 3ab054ff
This diff is collapsed.
......@@ -58,7 +58,8 @@ drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
boot := arch/x86_64/boot
.PHONY: bzImage bzlilo bzdisk install archmrproper
.PHONY: bzImage bzlilo install archmrproper \
fdimage fdimage144 fdimage288 archclean
#Default target when executing "make"
all: bzImage
......@@ -74,7 +75,7 @@ bzlilo: vmlinux
bzdisk: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk
install: vmlinux
install fdimage fdimage144 fdimage288: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@
archclean:
......@@ -103,3 +104,6 @@ define archhelp
echo ' install to $$(INSTALL_PATH) and run lilo'
endef
CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf
......@@ -59,8 +59,36 @@ $(obj)/setup $(obj)/bootsect: %: %.o FORCE
$(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@
zdisk: $(BOOTIMAGE)
dd bs=8192 if=$(BOOTIMAGE) of=/dev/fd0
# Set this if you want to pass append arguments to the zdisk/fdimage kernel
FDARGS =
$(obj)/mtools.conf: $(obj)/mtools.conf.in
sed -e 's|@OBJ@|$(obj)|g' < $< > $@
# This requires write access to /dev/fd0
zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
MTOOLSRC=$(src)/mtools.conf mformat a: ; sync
syslinux /dev/fd0 ; sync
echo 'default linux $(FDARGS)' | \
MTOOLSRC=$(src)/mtools.conf mcopy - a:syslinux.cfg
MTOOLSRC=$(src)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync
# These require being root or having syslinux run setuid
fdimage fdimage144: $(BOOTIMAGE) $(src)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
MTOOLSRC=$(src)/mtools.conf mformat v: ; sync
syslinux $(obj)/fdimage ; sync
echo 'default linux $(FDARGS)' | \
MTOOLSRC=$(src)/mtools.conf mcopy - v:syslinux.cfg
MTOOLSRC=$(src)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync
fdimage288: $(BOOTIMAGE) $(src)/mtools.conf
dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
MTOOLSRC=$(src)/mtools.conf mformat w: ; sync
syslinux $(obj)/fdimage ; sync
echo 'default linux $(FDARGS)' | \
MTOOLSRC=$(src)/mtools.conf mcopy - w:syslinux.cfg
MTOOLSRC=$(src)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync
zlilo: $(BOOTIMAGE)
if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
......
This diff is collapsed.
#
# mtools configuration file for "make (b)zdisk"
#
# Actual floppy drive
drive a:
file="/dev/fd0"
# 1.44 MB floppy disk image
drive v:
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=18 filter
# 2.88 MB floppy disk image (mostly for virtual uses)
drive w:
file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter
......@@ -150,13 +150,10 @@ int main(int argc, char ** argv)
sz = sb.st_size;
fprintf (stderr, "System is %d kB\n", sz/1024);
sys_size = (sz + 15) / 16;
/* 0x28000*16 = 2.5 MB, conservative estimate for the current maximum */
if (sys_size > (is_big_kernel ? 0x28000 : DEF_SYSSIZE))
/* 0x40000*16 = 4.0 MB, reasonable estimate for the current maximum */
if (sys_size > (is_big_kernel ? 0x40000 : DEF_SYSSIZE))
die("System is too big. Try using %smodules.",
is_big_kernel ? "" : "bzImage or ");
if (sys_size > 0xefff)
fprintf(stderr,"warning: kernel is too big for standalone boot "
"from floppy\n");
while (sz > 0) {
int l, n;
......
......@@ -5,7 +5,6 @@ CONFIG_X86_64=y
CONFIG_X86=y
CONFIG_MMU=y
CONFIG_SWAP=y
CONFIG_UID16=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
CONFIG_X86_CMPXCHG=y
CONFIG_EARLY_PRINTK=y
......@@ -22,12 +21,6 @@ CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
# CONFIG_BSD_PROCESS_ACCT is not set
CONFIG_SYSCTL=y
# CONFIG_LOG_BUF_SHIFT_17 is not set
CONFIG_LOG_BUF_SHIFT_16=y
# CONFIG_LOG_BUF_SHIFT_15 is not set
# CONFIG_LOG_BUF_SHIFT_14 is not set
# CONFIG_LOG_BUF_SHIFT_13 is not set
# CONFIG_LOG_BUF_SHIFT_12 is not set
CONFIG_LOG_BUF_SHIFT=16
#
......@@ -37,6 +30,7 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_OBSOLETE_MODPARM=y
# CONFIG_MODVERSIONS is not set
# CONFIG_KMOD is not set
#
......@@ -103,6 +97,7 @@ CONFIG_BINFMT_ELF=y
# CONFIG_BINFMT_MISC is not set
CONFIG_IA32_EMULATION=y
CONFIG_COMPAT=y
CONFIG_UID16=y
#
# Memory Technology Devices (MTD)
......@@ -290,6 +285,7 @@ CONFIG_NETDEVICES=y
# Ethernet (10 or 100Mbit)
#
CONFIG_NET_ETHERNET=y
# CONFIG_MII is not set
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_NET_VENDOR_3COM is not set
......@@ -490,6 +486,7 @@ CONFIG_RTC=y
# CONFIG_DRM is not set
# CONFIG_MWAVE is not set
CONFIG_RAW_DRIVER=y
# CONFIG_HANGCHECK_TIMER is not set
#
# Misc devices
......@@ -615,7 +612,6 @@ CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_SLAB is not set
CONFIG_MAGIC_SYSRQ=y
# CONFIG_DEBUG_SPINLOCK is not set
CONFIG_CHECKING=y
# CONFIG_INIT_DEBUG is not set
CONFIG_KALLSYMS=y
# CONFIG_FRAME_POINTER is not set
......
......@@ -146,6 +146,7 @@ int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 *buf, int fs
return -1;
}
tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
current->used_math = 1;
return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
}
......
......@@ -450,7 +450,7 @@ ia32_sys_call_table:
.quad sys32_io_getevents
.quad sys32_io_submit
.quad sys_io_cancel
.quad sys_ni_syscall /* 250 alloc_huge_pages */
.quad sys_fadvise64
.quad sys_ni_syscall /* free_huge_pages */
.quad sys_exit_group /* exit_group */
.quad sys_lookup_dcookie
......
......@@ -17,7 +17,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o
obj-$(CONFIG_ACPI) += acpi.o
#obj-$(CONFIG_ACPI_SLEEP) += acpi_wakeup.o
obj-$(CONFIG_ACPI_SLEEP) += wakeup.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
......
......@@ -44,6 +44,9 @@
#include <asm/pgalloc.h>
#include <asm/io_apic.h>
#include <asm/proto.h>
#include <asm/desc.h>
#include <asm/system.h>
#include <asm/segment.h>
extern int acpi_disabled;
......@@ -70,7 +73,6 @@ __acpi_map_table (
if (phys_addr < (end_pfn_map << PAGE_SHIFT))
return __va(phys_addr);
printk("acpi mapping beyond end_pfn: %lx > %lx\n", phys_addr, end_pfn<<PAGE_SHIFT);
return NULL;
}
......@@ -292,8 +294,7 @@ acpi_find_rsdp (void)
int __init
acpi_boot_init (
char *cmdline)
acpi_boot_init (void)
{
int result = 0;
......@@ -306,7 +307,7 @@ acpi_boot_init (
/*
* Initialize the ACPI boot-time table parser.
*/
result = acpi_table_init(cmdline);
result = acpi_table_init();
if (result)
return result;
......@@ -441,95 +442,41 @@ acpi_boot_init (
#ifdef CONFIG_ACPI_SLEEP
#error not ported to x86-64 yet
#ifdef DEBUG
#include <linux/serial.h>
#endif
extern void acpi_prepare_wakeup(void);
extern unsigned char acpi_wakeup[], acpi_wakeup_end[], s3_prot16[];
/* address in low memory of the wakeup routine. */
unsigned long acpi_wakeup_address = 0;
/* new page directory that we will be using */
static pmd_t *pmd;
/* saved page directory */
static pmd_t saved_pmd;
/* page which we'll use for the new page directory */
static pte_t *ptep;
extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long));
/*
* acpi_create_identity_pmd
*
* Create a new, identity mapped pmd.
*
* Do this by creating new page directory, and marking all the pages as R/W
* Then set it as the new Page Middle Directory.
* And, of course, flush the TLB so it takes effect.
*
* We save the address of the old one, for later restoration.
*/
static void acpi_create_identity_pmd (void)
{
pgd_t *pgd;
int i;
ptep = (pte_t*)__get_free_page(GFP_KERNEL);
/* fill page with low mapping */
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(ptep + i, mk_pte_phys(i << PAGE_SHIFT, PAGE_SHARED));
pgd = pgd_offset(current->active_mm, 0);
pmd = pmd_alloc(current->mm,pgd, 0);
/* save the old pmd */
saved_pmd = *pmd;
/* set the new one */
set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(ptep)));
/* flush the TLB */
local_flush_tlb();
}
/*
* acpi_restore_pmd
*
* Restore the old pmd saved by acpi_create_identity_pmd and
* free the page that said function alloc'd
*/
static void acpi_restore_pmd (void)
{
set_pmd(pmd, saved_pmd);
local_flush_tlb();
free_page((unsigned long)ptep);
}
unsigned long acpi_wakeup_address;
/**
* acpi_save_state_mem - save kernel state
*
* Create an identity mapped page table and copy the wakeup routine to
* low memory.
*/
int acpi_save_state_mem (void)
{
acpi_create_identity_pmd();
acpi_copy_wakeup_routine(acpi_wakeup_address);
if (!acpi_wakeup_address)
return -1;
memcpy((void*)acpi_wakeup_address, acpi_wakeup, acpi_wakeup_end - acpi_wakeup);
return 0;
}
/**
* acpi_save_state_disk - save kernel state to disk
*
* Assume preemption/interrupts are already turned off and that we're running
* on the BP (note this doesn't imply SMP is handled correctly)
*/
int acpi_save_state_disk (void)
{
unsigned long pbase = read_cr3() & PAGE_MASK;
if (pbase >= 0xffffffffUL) {
printk(KERN_ERR "ACPI: High page table. Suspend disabled.\n");
return 1;
}
set_seg_base(smp_processor_id(), GDT_ENTRY_KERNELCS16, s3_prot16);
swap_low_mappings();
acpi_prepare_wakeup();
return 0;
}
/*
......@@ -537,13 +484,13 @@ int acpi_save_state_disk (void)
*/
void acpi_restore_state_mem (void)
{
acpi_restore_pmd();
swap_low_mappings();
}
/**
* acpi_reserve_bootmem - do _very_ early ACPI initialisation
*
* We allocate a page in low memory for the wakeup
* We allocate a page in 1MB low memory for the real-mode wakeup
* routine for when we come back from a sleep state. The
* runtime allocator allows specification of <16M pages, but not
* <1M pages.
......@@ -551,7 +498,10 @@ void acpi_restore_state_mem (void)
void __init acpi_reserve_bootmem(void)
{
acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
printk(KERN_DEBUG "ACPI: have wakeup address 0x%8.8lx\n", acpi_wakeup_address);
if (!acpi_wakeup_address) {
printk(KERN_ERR "ACPI: Cannot allocate lowmem. S3 disabled.\n");
return;
}
}
#endif /*CONFIG_ACPI_SLEEP*/
......
......@@ -57,7 +57,7 @@ static u32 __init allocate_aperture(void)
printk("Cannot allocate aperture memory hole (%p,%uK)\n",
p, aper_size>>10);
if (p)
free_bootmem((unsigned long)p, aper_size);
free_bootmem_node(nd0, (unsigned long)p, aper_size);
return 0;
}
printk("Mapping aperture over %d KB of RAM @ %lx\n",
......
......@@ -1026,7 +1026,7 @@ asmlinkage void smp_error_interrupt(void)
irq_exit();
}
int disable_apic __initdata;
int disable_apic;
/*
* This initializes the IO-APIC and APIC hardware if this is
......@@ -1038,8 +1038,10 @@ int __init APIC_init_uniprocessor (void)
printk(KERN_INFO "Apic disabled\n");
return -1;
}
if (!smp_found_config && !cpu_has_apic)
if (!smp_found_config && !cpu_has_apic) {
disable_apic = 1;
return -1;
}
/*
* Complain if the BIOS pretends there is one.
......@@ -1047,6 +1049,7 @@ int __init APIC_init_uniprocessor (void)
if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_id])) {
printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_id);
disable_apic = 1;
return -1;
}
......
/*
* arch/x86_64/kernel/bluesmoke.c - x86-64 Machine Check Exception Reporting
*
RED-PEN: need to add power management to restore after S3 wakeup.
*/
#include <linux/init.h>
......
......@@ -19,13 +19,17 @@
#include <asm/proto.h>
#include <asm/bootsetup.h>
extern unsigned long table_start, table_end;
extern char _end[];
/*
* PFN of last memory page.
*/
unsigned long end_pfn;
/*
* end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
* The direct mapping extends to end_pfn_map, so that we can directly access
* ACPI and other tables without having to play with fixmaps.
* apertures, ACPI and other tables without having to play with fixmaps.
*/
unsigned long end_pfn_map;
......@@ -42,18 +46,16 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
unsigned long addr = *addrp, last = addr + size;
/* various gunk below that needed for SMP startup */
if (addr < 7*PAGE_SIZE) {
*addrp = 7*PAGE_SIZE;
if (addr < 0x8000) {
*addrp = 0x8000;
return 1;
}
#if 0
/* direct mapping tables of the kernel */
if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
*addrp = table_end << PAGE_SHIFT;
return 1;
}
#endif
/* initrd */
#ifdef CONFIG_BLK_DEV_INITRD
......@@ -145,10 +147,10 @@ void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned lon
/*
* Find the highest page frame number we have available
*/
void __init e820_end_of_ram(void)
unsigned long __init e820_end_of_ram(void)
{
int i;
end_pfn = 0;
unsigned long end_pfn = 0;
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
......@@ -175,6 +177,8 @@ void __init e820_end_of_ram(void)
end_pfn = end_user_pfn;
if (end_pfn > end_pfn_map)
end_pfn = end_pfn_map;
return end_pfn;
}
/*
......
......@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/string.h>
#include <asm/io.h>
#include <asm/processor.h>
/* Simple VGA output */
......@@ -104,9 +105,9 @@ static __init void early_serial_init(char *opt)
s = strsep(&opt, ",");
if (s != NULL) {
unsigned port;
if (!strncmp(s,"0x",2))
if (!strncmp(s,"0x",2)) {
early_serial_base = simple_strtoul(s, &e, 16);
else {
} else {
static int bases[] = { 0x3f8, 0x2f8 };
if (!strncmp(s,"ttyS",4))
s+=4;
......
......@@ -512,8 +512,7 @@ ENTRY(spurious_interrupt)
* Exception entry point. This expects an error code/orig_rax on the stack
* and the exception handler in %rax.
*/
ALIGN
error_entry:
ENTRY(error_entry)
/* rdi slot contains rax, oldrax contains error code */
pushq %rsi
movq 8(%rsp),%rsi /* load rax */
......@@ -532,10 +531,7 @@ error_swapgs:
xorl %ebx,%ebx
swapgs
error_sti:
bt $9,EFLAGS(%rsp)
jnc 1f
sti
1: movq %rdi,RDI(%rsp)
movq %rdi,RDI(%rsp)
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
......@@ -573,7 +569,8 @@ ENTRY(load_gs_index)
swapgs
gs_change:
movl %edi,%gs
2: swapgs
2: sfence /* workaround */
swapgs
popf
ret
......
......@@ -72,8 +72,7 @@ startup_32:
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
rdmsr
/* Fool rdmsr and reset %eax to avoid dependences */
xorl %eax, %eax
/* Enable Long Mode */
btsl $_EFER_LME, %eax
/* Enable System Call */
......@@ -112,7 +111,6 @@ reach_compatibility_mode:
jnz second
/* Load new GDT with the 64bit segment using 32bit descriptor */
/* to avoid 32bit relocations we use fixed adresses here */
movl $(pGDT32 - __START_KERNEL_map), %eax
lgdt (%eax)
......@@ -349,17 +347,14 @@ ENTRY(cpu_gdt_table)
.quad 0x00cffe000000ffff /* __USER32_CS */
.quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
.quad 0x00affa000000ffff /* __USER_CS */
.word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb)
.word 0 # base address = 0
.word 0x9A00 # code read/exec
.word 0x00CF # granularity = 4096, 386
# (+5th nibble of limit)
/* __KERNEL32_CS */
.quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0,0 /* TSS */
.quad 0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
.quad 0x00cff2000000ffff /* dummy descriptor for long base */
.quad 0 /* pad to cache line boundary */
.quad 0 /* unused now */
.quad 0x00009a000000ffff /* __KERNEL16_CS - 16bit PM for S3 wakeup. */
/* base must be patched for real base address. */
/* This should be a multiple of the cache line size */
gdt_end:
.globl gdt_end
......
......@@ -13,6 +13,8 @@
#include <linux/string.h>
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/smp.h>
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
......@@ -70,9 +72,6 @@ static void __init setup_boot_cpu_data(void)
boot_cpu_data.x86_mask = eax & 0xf;
}
extern void start_kernel(void), pda_init(int), setup_early_printk(char *);
extern int disable_apic;
void __init x86_64_start_kernel(char * real_mode_data)
{
char *s;
......@@ -83,6 +82,11 @@ void __init x86_64_start_kernel(char * real_mode_data)
s = strstr(saved_command_line, "earlyprintk=");
if (s != NULL)
setup_early_printk(s+12);
#ifdef CONFIG_DISCONTIGMEM
s = strstr(saved_command_line, "numa=");
if (s != NULL)
numa_setup(s+5);
#endif
#ifdef CONFIG_X86_IO_APIC
if (strstr(saved_command_line, "disableapic"))
disable_apic = 1;
......
......@@ -11,6 +11,7 @@
static struct fs_struct init_fs = INIT_FS;
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
/*
......
......@@ -137,7 +137,8 @@ int show_interrupts(struct seq_file *p, void *v)
struct irqaction * action;
seq_printf(p, " ");
for_each_cpu(j)
for (j=0; j<NR_CPUS; j++)
if (cpu_online(j))
seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
......@@ -149,7 +150,8 @@ int show_interrupts(struct seq_file *p, void *v)
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for_each_cpu(j)
for (j=0; j<NR_CPUS; j++)
if (cpu_online(j))
seq_printf(p, "%10u ",
kstat_cpu(j).irqs[i]);
#endif
......@@ -161,12 +163,14 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
}
seq_printf(p, "NMI: ");
for_each_cpu(j)
for (j = 0; j < NR_CPUS; j++)
if (cpu_online(j))
seq_printf(p, "%10u ", cpu_pda[j].__nmi_count);
seq_putc(p, '\n');
#if CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
for_each_cpu(j)
for (j = 0; j < NR_CPUS; j++)
if (cpu_online(j))
seq_printf(p, "%10u ", cpu_pda[j].apic_timer_irqs);
seq_putc(p, '\n');
#endif
......
......@@ -31,6 +31,11 @@
#define DEBUGP(fmt...)
/* TODO this should be in vmlist, but we must fix get_vm_area first to
handle out of bounds entries properly.
Also need to fix /proc/kcore, /dev/kmem */
static struct vm_struct *mod_vmlist;
void module_free(struct module *mod, void *module_region)
{
struct vm_struct **prevp, *map;
......@@ -40,7 +45,7 @@ void module_free(struct module *mod, void *module_region)
if (!addr)
return;
write_lock(&vmlist_lock);
for (prevp = &vmlist ; (map = *prevp) ; prevp = &map->next) {
for (prevp = &mod_vmlist ; (map = *prevp) ; prevp = &map->next) {
if ((unsigned long)map->addr == addr) {
*prevp = map->next;
write_unlock(&vmlist_lock);
......@@ -81,7 +86,7 @@ void *module_alloc(unsigned long size)
write_lock(&vmlist_lock);
addr = (void *) MODULES_VADDR;
for (p = &vmlist; (tmp = *p); p = &tmp->next) {
for (p = &mod_vmlist; (tmp = *p); p = &tmp->next) {
void *next;
DEBUGP("vmlist %p %lu addr %p\n", tmp->addr, tmp->size, addr);
if (size + (unsigned long) addr + PAGE_SIZE < (unsigned long) tmp->addr)
......
......@@ -29,6 +29,7 @@
#include <asm/mpspec.h>
#include <asm/pgalloc.h>
#include <asm/io_apic.h>
#include <asm/proto.h>
/* Have we found an MP table */
int smp_found_config;
......@@ -83,7 +84,6 @@ extern int acpi_parse_ioapic (acpi_table_entry_header *header);
* Intel MP BIOS table parsing routines:
*/
#ifndef CONFIG_X86_VISWS_APIC
/*
* Checksum an MP configuration block.
*/
......@@ -582,9 +582,9 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
smp_found_config = 1;
printk("found SMP MP-table at %08lx\n",
virt_to_phys(mpf));
reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
if (mpf->mpf_physptr)
reserve_bootmem(mpf->mpf_physptr, PAGE_SIZE);
reserve_bootmem_generic(mpf->mpf_physptr, PAGE_SIZE);
mpf_found = mpf;
return 1;
}
......@@ -632,38 +632,14 @@ void __init find_intel_smp (void)
printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.kernel.org if you experience SMP problems!\n");
}
#else
/*
* The Visual Workstation is Intel MP compliant in the hardware
* sense, but it doesnt have a BIOS(-configuration table).
* No problem for Linux.
*/
void __init find_visws_smp(void)
{
smp_found_config = 1;
phys_cpu_present_map |= 2; /* or in id 1 */
apic_version[1] |= 0x10; /* integrated APIC */
apic_version[0] |= 0x10;
mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
}
#endif
/*
* - Intel MP Configuration Table
* - or SGI Visual Workstation configuration
*/
void __init find_smp_config (void)
{
#ifdef CONFIG_X86_LOCAL_APIC
find_intel_smp();
#endif
#ifdef CONFIG_VISWS
find_visws_smp();
#endif
}
......
......@@ -22,6 +22,9 @@
*
* This driver uses /dev/cpu/%d/msr where %d is the minor number, and on
* an SMP box will direct the access to CPU %d.
RED-PEN: need to get power management for S3 restore
*/
#include <linux/module.h>
......
......@@ -24,6 +24,7 @@
#include <asm/mtrr.h>
#include <asm/mpspec.h>
#include <asm/nmi.h>
#include <asm/msr.h>
extern void default_do_nmi(struct pt_regs *);
......@@ -71,13 +72,14 @@ int __init check_nmi_watchdog (void)
printk(KERN_INFO "testing NMI watchdog ... ");
for_each_cpu(cpu) {
for (cpu = 0; cpu < NR_CPUS; cpu++)
counts[cpu] = cpu_pda[cpu].__nmi_count;
}
local_irq_enable();
mdelay((10*1000)/nmi_hz); // wait 10 ticks
for_each_cpu(cpu) {
for (cpu = 0; cpu < NR_CPUS; cpu++) {
if (!cpu_online(cpu))
continue;
if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
printk("CPU#%d: NMI appears to be stuck (%d)!\n",
cpu,
......@@ -173,7 +175,7 @@ static inline void nmi_pm_init(void) { }
* Original code written by Keith Owens.
*/
static void __pminit setup_k7_watchdog(void)
static void setup_k7_watchdog(void)
{
int i;
unsigned int evntsel;
......@@ -183,8 +185,10 @@ static void __pminit setup_k7_watchdog(void)
nmi_perfctr_msr = MSR_K7_PERFCTR0;
for(i = 0; i < 4; ++i) {
wrmsr(MSR_K7_EVNTSEL0+i, 0, 0);
wrmsr(MSR_K7_PERFCTR0+i, 0, 0);
/* Simulator may not support it */
if (checking_wrmsrl(MSR_K7_EVNTSEL0+i, 0UL))
return;
wrmsrl(MSR_K7_PERFCTR0+i, 0UL);
}
evntsel = K7_EVNTSEL_INT
......@@ -200,16 +204,12 @@ static void __pminit setup_k7_watchdog(void)
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
}
void __pminit setup_apic_nmi_watchdog (void)
void setup_apic_nmi_watchdog (void)
{
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
if (boot_cpu_data.x86 < 6)
return;
/* Simics masquerades as AMD, but does not support
performance counters */
if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
return;
setup_k7_watchdog();
break;
default:
......
......@@ -366,12 +366,15 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
also reload when it has changed.
when prev process used 64bit base always reload
to avoid an information leak. */
if (unlikely((fsindex | next->fsindex) || prev->fs))
if (unlikely(fsindex | next->fsindex | prev->fs)) {
loadsegment(fs, next->fsindex);
/* check if the user changed the selector
if yes clear 64bit base. */
if (unlikely(fsindex != prev->fsindex))
/* check if the user used a selector != 0
* if yes clear 64bit base, since overloaded base
* is always mapped to the Null selector
*/
if (fsindex)
prev->fs = 0;
}
/* when next process has a 64bit base use it */
if (next->fs)
wrmsrl(MSR_FS_BASE, next->fs);
......@@ -380,10 +383,11 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
unsigned gsindex;
asm volatile("movl %%gs,%0" : "=g" (gsindex));
if (unlikely((gsindex | next->gsindex) || prev->gs))
if (unlikely(gsindex | next->gsindex | prev->gs)) {
load_gs_index(next->gsindex);
if (unlikely(gsindex != prev->gsindex))
if (gsindex)
prev->gs = 0;
}
if (next->gs)
wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
prev->gsindex = gsindex;
......@@ -537,29 +541,23 @@ int sys_arch_prctl(int code, unsigned long addr)
switch (code) {
case ARCH_SET_GS:
#if 1
/* For now. We still have one unsolved bug in long gs base context
switch handling. */
return -EINVAL;
#else
if (addr >= TASK_SIZE)
return -EPERM;
get_cpu();
load_gs_index(__USER_LONGBASE);
current->thread.gsindex = __USER_LONGBASE;
load_gs_index(0);
current->thread.gsindex = 0;
current->thread.gs = addr;
ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
put_cpu();
break;
#endif
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
with gs */
if (addr >= TASK_SIZE)
return -EPERM;
get_cpu();
asm volatile("movl %0,%%fs" :: "r" (__USER_LONGBASE));
current->thread.fsindex = __USER_LONGBASE;
asm volatile("movl %0,%%fs" :: "r" (0));
current->thread.fsindex = 0;
current->thread.fs = addr;
ret = checking_wrmsrl(MSR_FS_BASE, addr);
put_cpu();
......
......@@ -38,6 +38,7 @@
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/pci.h>
#include <linux/acpi.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/system.h>
......@@ -53,6 +54,8 @@
#include <asm/smp.h>
#include <asm/proto.h>
#define Dprintk(x...) printk(x)
/*
* Machine setup..
*/
......@@ -236,6 +239,8 @@ void __init setup_arch(char **cmdline_p)
{
int i;
Dprintk("setup_arch\n");
ROOT_DEV = ORIG_ROOT_DEV;
drive_info = DRIVE_INFO;
screen_info = SCREEN_INFO;
......@@ -263,34 +268,34 @@ void __init setup_arch(char **cmdline_p)
parse_cmdline_early(cmdline_p);
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
#define MAXMEM (120UL * 1024 * 1024 * 1024 * 1024) /* 120TB */
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
#define MAX_NONPAE_PFN (1 << 20)
/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
start_pfn = PFN_UP(__pa_symbol(&_end));
e820_end_of_ram();
end_pfn = e820_end_of_ram();
init_memory_mapping();
#ifdef CONFIG_DISCONTIGMEM
numa_initmem_init(0, end_pfn);
#else
contig_initmem_init();
#endif
/* Reserve direct mapping */
reserve_bootmem_generic(table_start << PAGE_SHIFT,
(table_end - table_start) << PAGE_SHIFT);
/* reserve kernel */
reserve_bootmem(HIGH_MEMORY, PFN_PHYS(start_pfn) - HIGH_MEMORY);
unsigned long kernel_end;
kernel_end = round_up(__pa_symbol(&_end),PAGE_SIZE);
reserve_bootmem_generic(HIGH_MEMORY, kernel_end - HIGH_MEMORY);
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
*/
reserve_bootmem(0, PAGE_SIZE);
reserve_bootmem_generic(0, PAGE_SIZE);
#ifdef CONFIG_SMP
/*
......@@ -298,8 +303,12 @@ void __init setup_arch(char **cmdline_p)
* FIXME: Don't need the extra page at 4K, but need to fix
* trampoline before removing it. (see the GDT stuff)
*/
reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
/* Reserve SMP trampoline */
reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
#endif
#ifdef CONFIG_ACPI_SLEEP
/*
* Reserve low memory region for sleep support.
......@@ -315,7 +324,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_BLK_DEV_INITRD
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
reserve_bootmem(INITRD_START, INITRD_SIZE);
reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
initrd_start =
INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
initrd_end = initrd_start+INITRD_SIZE;
......@@ -330,14 +339,6 @@ void __init setup_arch(char **cmdline_p)
}
#endif
/*
* NOTE: before this point _nobody_ is allowed to allocate
* any memory using the bootmem allocator.
*/
#ifdef CONFIG_SMP
smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
#endif
paging_init();
#ifdef CONFIG_ACPI_BOOT
/*
......@@ -347,7 +348,7 @@ void __init setup_arch(char **cmdline_p)
* of MADT).
*/
if (!acpi_disabled)
acpi_boot_init(*cmdline_p);
acpi_boot_init();
#endif
#ifdef CONFIG_X86_LOCAL_APIC
/*
......
/*
* X86-64 specific CPU setup.
* Copyright (C) 1995 Linus Torvalds
* Copyright 2001, 2002 SuSE Labs / Andi Kleen.
* Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
* See setup.c for older changelog.
* $Id: setup64.c,v 1.12 2002/03/21 10:09:17 ak Exp $
*/
......@@ -90,6 +90,17 @@ void pda_init(int cpu)
pml4_t *level4;
struct x8664_pda *pda = &cpu_pda[cpu];
/* Setup up data that may be needed in __get_free_pages early */
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
pda->me = pda;
pda->cpunumber = cpu;
pda->irqcount = -1;
pda->cpudata_offset = 0;
pda->kernelstack =
(unsigned long)current_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
if (cpu == 0) {
/* others are initialized in smpboot.c */
pda->pcurrent = &init_task;
......@@ -112,18 +123,8 @@ void pda_init(int cpu)
asm volatile("movq %0,%%cr3" :: "r" (__pa(level4)));
pda->irqstackptr += IRQSTACKSIZE-64;
pda->cpunumber = cpu;
pda->irqcount = -1;
pda->kernelstack =
(unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
pda->me = pda;
pda->cpudata_offset = 0;
pda->active_mm = &init_mm;
pda->mmu_state = 0;
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
}
#define EXCEPTION_STK_ORDER 0 /* >= N_EXCEPTION_STACKS*EXCEPTION_STKSZ */
......@@ -150,10 +151,10 @@ void __init cpu_init (void)
/* CPU 0 is initialised in head64.c */
if (cpu != 0) {
pda_init(cpu);
estacks = (char *)__get_free_pages(GFP_ATOMIC, 0);
if (!estacks)
panic("Can't allocate exception stacks for CPU %d\n",cpu);
pda_init(cpu);
} else
estacks = boot_exception_stacks;
......
......@@ -353,7 +353,7 @@ static void
handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
struct pt_regs * regs)
{
struct k_sigaction *ka = &current->sig->action[sig-1];
struct k_sigaction *ka = &current->sighand->action[sig-1];
#if DEBUG_SIG
printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig,
......
......@@ -3,6 +3,7 @@
*
* (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
* (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
* (c) 2002,2003 Andi Kleen, SuSE Labs.
*
* This code is released under the GNU General Public License version 2 or
* later.
......@@ -491,3 +492,24 @@ asmlinkage void smp_call_function_interrupt(void)
}
}
/* Slow. Should be only used for debugging. */
int slow_smp_processor_id(void)
{
int stack_location;
unsigned long sp = (unsigned long)&stack_location;
int cpu;
unsigned long mask;
for_each_cpu(cpu, mask) {
if (sp >= (u64)cpu_pda[cpu].irqstackptr - IRQSTACKSIZE &&
sp <= (u64)cpu_pda[cpu].irqstackptr)
return cpu;
unsigned long estack = init_tss[cpu].ist[0] - EXCEPTION_STKSZ;
if (sp >= estack && sp <= estack+(1<<(PAGE_SHIFT+EXCEPTION_STK_ORDER)))
return cpu;
}
return stack_smp_processor_id();
}
......@@ -51,13 +51,10 @@
#include <asm/kdebug.h>
#include <asm/tlbflush.h>
/* Bitmask of currently online CPUs */
unsigned long cpu_online_map;
extern int disable_apic;
/* which CPU (physical APIC ID) maps to which logical CPU number */
volatile int x86_apicid_to_cpu[NR_CPUS];
/* which logical CPU number maps to which CPU (physical APIC ID) */
volatile int x86_cpu_to_apicid[NR_CPUS];
/* Bitmask of currently online CPUs */
unsigned long cpu_online_map = 1;
static volatile unsigned long cpu_callin_map;
volatile unsigned long cpu_callout_map;
......@@ -75,7 +72,6 @@ int smp_threads_ready;
extern unsigned char trampoline_data [];
extern unsigned char trampoline_end [];
static unsigned char *trampoline_base;
/*
* Currently trivial. Write the real->protected mode
......@@ -85,25 +81,11 @@ static unsigned char *trampoline_base;
static unsigned long __init setup_trampoline(void)
{
void *tramp = __va(SMP_TRAMPOLINE_BASE);
extern volatile __u32 tramp_gdt_ptr;
tramp_gdt_ptr = __pa_symbol(&cpu_gdt_table);
memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(trampoline_base);
}
/*
* We are called very early to get the low memory for the
* SMP bootup trampoline page.
*/
void __init smp_alloc_memory(void)
{
trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE);
/*
* Has to be in very low memory so we can execute
* real-mode AP code.
*/
if (__pa(trampoline_base) >= 0x9F000)
BUG();
memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(tramp);
}
/*
......@@ -174,6 +156,7 @@ static void __init synchronize_tsc_bp (void)
*/
atomic_inc(&tsc_count_start);
sync_core();
rdtscll(tsc_values[smp_processor_id()]);
/*
* We clear the TSC in the last loop:
......@@ -245,6 +228,7 @@ static void __init synchronize_tsc_ap (void)
atomic_inc(&tsc_count_start);
while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb();
sync_core();
rdtscll(tsc_values[smp_processor_id()]);
if (i == NR_LOOPS-1)
write_tsc(0, 0);
......@@ -369,6 +353,9 @@ int __init start_secondary(void *unused)
cpu_init();
smp_callin();
/* otherwise gcc will move up the smp_processor_id before the cpu_init */
barrier();
Dprintk("cpu %d: waiting for commence\n", smp_processor_id());
while (!test_bit(smp_processor_id(), &smp_commenced_mask))
rep_nop();
......@@ -620,8 +607,6 @@ static void __init do_boot_cpu (int apicid)
*/
init_idle(idle,cpu);
x86_cpu_to_apicid[cpu] = apicid;
x86_apicid_to_cpu[apicid] = cpu;
idle->thread.rip = (unsigned long)start_secondary;
// idle->thread.rsp = (unsigned long)idle->thread_info + THREAD_SIZE - 512;
......@@ -713,8 +698,6 @@ static void __init do_boot_cpu (int apicid)
}
}
if (boot_error) {
x86_cpu_to_apicid[cpu] = -1;
x86_apicid_to_cpu[apicid] = -1;
clear_bit(cpu, &cpu_callout_map); /* was set here (do_boot_cpu()) */
clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
cpucount--;
......@@ -776,14 +759,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
{
int apicid, cpu;
/*
* Initialize the logical to physical CPU number mapping
*/
for (apicid = 0; apicid < NR_CPUS; apicid++) {
x86_apicid_to_cpu[apicid] = -1;
}
/*
* Setup boot CPU information
*/
......@@ -791,8 +766,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
printk("CPU%d: ", 0);
print_cpu_info(&cpu_data[0]);
x86_apicid_to_cpu[boot_cpu_id] = 0;
x86_cpu_to_apicid[0] = boot_cpu_id;
current_thread_info()->cpu = 0;
smp_tune_scheduling();
......@@ -837,6 +810,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
io_apic_irqs = 0;
cpu_online_map = phys_cpu_present_map = 1;
phys_cpu_present_map = 1;
disable_apic = 1;
return;
}
......@@ -851,6 +825,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
io_apic_irqs = 0;
cpu_online_map = phys_cpu_present_map = 1;
phys_cpu_present_map = 1;
disable_apic = 1;
return;
}
......@@ -878,13 +853,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
continue;
do_boot_cpu(apicid);
/*
* Make sure we unmap all failed CPUs
*/
if ((x86_apicid_to_cpu[apicid] == -1) &&
(phys_cpu_present_map & (1 << apicid)))
printk("phys CPU #%d not responding - cannot use it.\n",apicid);
}
/*
......
......@@ -55,7 +55,6 @@ long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigne
if (!file)
goto out;
}
down_write(&current->mm->mmap_sem);
error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT);
up_write(&current->mm->mmap_sem);
......
......@@ -9,6 +9,7 @@
* Copyright (c) 1996 Ingo Molnar
* Copyright (c) 1998 Andrea Arcangeli
* Copyright (c) 2002 Vojtech Pavlik
* Copyright (c) 2003 Andi Kleen
*
*/
......@@ -25,9 +26,14 @@
#include <linux/bcd.h>
#include <asm/vsyscall.h>
#include <asm/timex.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/apic.h>
#endif
u64 jiffies_64;
extern int using_apic_timer;
spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED;
extern int using_apic_timer;
......@@ -56,12 +62,10 @@ struct timezone __sys_tz __section_sys_tz;
* together by xtime_lock.
*/
static spinlock_t time_offset_lock = SPIN_LOCK_UNLOCKED;
static unsigned long timeoffset = 0;
inline unsigned int do_gettimeoffset(void)
{
unsigned long t;
sync_core();
rdtscll(t);
return (t - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset;
}
......@@ -74,10 +78,9 @@ inline unsigned int do_gettimeoffset(void)
void do_gettimeofday(struct timeval *tv)
{
unsigned long flags, t, seq;
unsigned long seq, t;
unsigned int sec, usec;
spin_lock_irqsave(&time_offset_lock, flags);
do {
seq = read_seqbegin(&xtime_lock);
......@@ -85,11 +88,9 @@ void do_gettimeofday(struct timeval *tv)
usec = xtime.tv_nsec / 1000;
t = (jiffies - wall_jiffies) * (1000000L / HZ) + do_gettimeoffset();
if (t > timeoffset) timeoffset = t;
usec += timeoffset;
usec += t;
} while (read_seqretry(&xtime_lock, seq));
spin_unlock_irqrestore(&time_offset_lock, flags);
tv->tv_sec = sec + usec / 1000000;
tv->tv_usec = usec % 1000000;
......@@ -104,7 +105,6 @@ void do_gettimeofday(struct timeval *tv)
void do_settimeofday(struct timeval *tv)
{
write_seqlock_irq(&xtime_lock);
vxtime_lock();
tv->tv_usec -= do_gettimeoffset() +
(jiffies - wall_jiffies) * tick_usec;
......@@ -116,7 +116,6 @@ void do_settimeofday(struct timeval *tv)
xtime.tv_sec = tv->tv_sec;
xtime.tv_nsec = (tv->tv_usec * 1000);
vxtime_unlock();
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
......@@ -207,11 +206,11 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
*/
write_seqlock(&xtime_lock);
vxtime_lock();
{
unsigned long t;
sync_core();
rdtscll(t);
hpet.offset = (t - hpet.last_tsc) * (1000000L / HZ) / hpet.ticks + hpet.offset - 1000000L / HZ;
if (hpet.offset >= 1000000L / HZ)
......@@ -219,7 +218,6 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
hpet.ticks = min_t(long, max_t(long, (t - hpet.last_tsc) * (1000000L / HZ) / (1000000L / HZ - hpet.offset),
cpu_khz * 1000/HZ * 15 / 16), cpu_khz * 1000/HZ * 16 / 15);
hpet.last_tsc = t;
timeoffset = 0;
}
/*
......@@ -255,7 +253,6 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
rtc_update = xtime.tv_sec + 660;
}
vxtime_unlock();
write_sequnlock(&xtime_lock);
}
......@@ -348,8 +345,9 @@ static unsigned int __init pit_calibrate_tsc(void)
outb((1193182 / (1000 / 50)) & 0xff, 0x42);
outb((1193182 / (1000 / 50)) >> 8, 0x42);
rdtscll(start);
sync_core();
while ((inb(0x61) & 0x20) == 0);
sync_core();
rdtscll(end);
......@@ -382,12 +380,12 @@ void __init time_init(void)
pit_init();
printk(KERN_INFO "time.c: Using 1.1931816 MHz PIT timer.\n");
setup_irq(0, &irq0);
cpu_khz = pit_calibrate_tsc();
printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
cpu_khz / 1000, cpu_khz % 1000);
hpet.ticks = cpu_khz * (1000 / HZ);
rdtscll(hpet.last_tsc);
setup_irq(0, &irq0);
}
__setup("report_lost_ticks", time_setup);
......@@ -77,6 +77,12 @@ extern int exception_trace;
struct notifier_block *die_chain;
static inline void conditional_sti(struct pt_regs *regs)
{
if (regs->eflags & X86_EFLAGS_IF)
local_irq_enable();
}
static int kstack_depth_to_print = 10;
#ifdef CONFIG_KALLSYMS
......@@ -128,8 +134,7 @@ void show_trace(unsigned long *stack)
{
unsigned long addr;
unsigned long *irqstack, *irqstack_end, *estack_end;
/* FIXME: should read the cpuid from the APIC; to still work with bogus %gs */
const int cpu = smp_processor_id();
const int cpu = safe_smp_processor_id();
int i;
printk("\nCall Trace:");
......@@ -210,7 +215,7 @@ void show_stack(unsigned long * rsp)
{
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
const int cpu = safe_smp_processor_id();
unsigned long *irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
unsigned long *irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE);
......@@ -252,12 +257,7 @@ void show_registers(struct pt_regs *regs)
int i;
int in_kernel = (regs->cs & 3) == 0;
unsigned long rsp;
#ifdef CONFIG_SMP
/* For SMP should get the APIC id here, just to protect against corrupted GS */
const int cpu = smp_processor_id();
#else
const int cpu = 0;
#endif
const int cpu = safe_smp_processor_id();
struct task_struct *cur = cpu_pda[cpu].pcurrent;
rsp = regs->rsp;
......@@ -330,7 +330,7 @@ void die(const char * str, struct pt_regs * regs, long err)
bust_spinlocks(1);
handle_BUG(regs);
printk("%s: %04lx\n", str, err & 0xffff);
cpu = smp_processor_id();
cpu = safe_smp_processor_id();
/* racy, but better than risking deadlock. */
local_irq_disable();
if (!spin_trylock(&die_lock)) {
......@@ -365,10 +365,12 @@ static inline unsigned long get_cr2(void)
static void do_trap(int trapnr, int signr, char *str,
struct pt_regs * regs, long error_code, siginfo_t *info)
{
conditional_sti(regs);
#ifdef CONFIG_CHECKING
{
unsigned long gs;
struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
rdmsrl(MSR_GS_BASE, gs);
if (gs != (unsigned long)pda) {
wrmsrl(MSR_GS_BASE, pda);
......@@ -454,10 +456,12 @@ extern void dump_pagetable(unsigned long);
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
{
conditional_sti(regs);
#ifdef CONFIG_CHECKING
{
unsigned long gs;
struct x8664_pda *pda = cpu_pda + hard_smp_processor_id();
struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
rdmsrl(MSR_GS_BASE, gs);
if (gs != (unsigned long)pda) {
wrmsrl(MSR_GS_BASE, pda);
......@@ -565,7 +569,7 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
#ifdef CONFIG_CHECKING
{
unsigned long gs;
struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
rdmsrl(MSR_GS_BASE, gs);
if (gs != (unsigned long)pda) {
wrmsrl(MSR_GS_BASE, pda);
......@@ -576,6 +580,8 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
asm("movq %%db6,%0" : "=r" (condition));
conditional_sti(regs);
if (notify_die(DIE_DEBUG, "debug", regs, error_code) == NOTIFY_BAD)
return;
......@@ -636,7 +642,6 @@ void math_error(void *rip)
struct task_struct * task;
siginfo_t info;
unsigned short cwd, swd;
/*
* Save the info for the exception handler and clear the error.
*/
......@@ -688,6 +693,7 @@ void math_error(void *rip)
asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
{
conditional_sti(regs);
math_error((void *)regs->rip);
}
......@@ -747,6 +753,7 @@ static inline void simd_math_error(void *rip)
asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
long error_code)
{
conditional_sti(regs);
simd_math_error((void *)regs->rip);
}
......
......@@ -2,6 +2,7 @@
* linux/arch/x86_64/kernel/vsyscall.c
*
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
* Copyright 2003 Andi Kleen, SuSE Labs.
*
* Thanks to hpa@transmeta.com for some useful hint.
* Special thanks to Ingo Molnar for his early experience with
......@@ -12,7 +13,8 @@
* vsyscalls. One vsyscall can reserve more than 1 slot to avoid
* jumping out of line if necessary.
*
* $Id: vsyscall.c,v 1.9 2002/03/21 13:42:58 ak Exp $
* Note: the concept clashes with user mode linux. If you use UML just
* set the kernel.vsyscall sysctl to 0.
*/
/*
......@@ -29,6 +31,9 @@
* broken programs will segfault and there's no security risk until we choose to
* fix it.
*
* Add HPET support (port from 2.4). Still needed?
* Nop out vsyscall syscall to avoid anchor for buffer overflows when sysctl off.
*
* These are not urgent things that we need to address only before shipping the first
* production binary kernels.
*/
......@@ -37,6 +42,7 @@
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <linux/seqlock.h>
#include <asm/vsyscall.h>
#include <asm/pgtable.h>
......@@ -44,19 +50,13 @@
#include <asm/fixmap.h>
#include <asm/errno.h>
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define NO_VSYSCALL 1
int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
#ifdef NO_VSYSCALL
#include <asm/unistd.h>
static int errno __section_vxtime_sequence;
static inline _syscall2(int,gettimeofday,struct timeval *,tv,struct timezone *,tz)
#else
static inline void timeval_normalize(struct timeval * tv)
{
time_t __sec;
......@@ -69,63 +69,60 @@ static inline void timeval_normalize(struct timeval * tv)
}
}
long __vxtime_sequence[2] __section_vxtime_sequence;
static inline void do_vgettimeofday(struct timeval * tv)
{
long sequence, t;
unsigned long sec, usec;
do {
sequence = __vxtime_sequence[1];
rmb();
sequence = read_seqbegin(&__xtime_lock);
sync_core();
rdtscll(t);
sec = __xtime.tv_sec;
usec = __xtime.tv_usec +
usec = (__xtime.tv_nsec * 1000) +
(__jiffies - __wall_jiffies) * (1000000 / HZ) +
(t - __hpet.last_tsc) * (1000000 / HZ) / __hpet.ticks + __hpet.offset;
rmb();
} while (sequence != __vxtime_sequence[0]);
} while (read_seqretry(&__xtime_lock, sequence));
tv->tv_sec = sec + usec / 1000000;
tv->tv_usec = usec % 1000000;
}
/* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
static inline void do_get_tz(struct timezone * tz)
{
long sequence;
do {
sequence = __vxtime_sequence[1];
rmb();
*tz = __sys_tz;
}
rmb();
} while (sequence != __vxtime_sequence[0]);
static inline int gettimeofday(struct timeval *tv, struct timezone *tz)
{
int ret;
asm volatile("syscall"
: "=a" (ret)
: "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
return ret;
}
#endif
static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{
#ifdef NO_VSYSCALL
if (unlikely(!__sysctl_vsyscall))
return gettimeofday(tv,tz);
#else
if (tv)
do_vgettimeofday(tv);
if (tz)
do_get_tz(tz);
return 0;
#endif
}
static time_t __vsyscall(1) vtime(time_t * t)
{
struct timeval tv;
vgettimeofday(&tv,NULL);
if (unlikely(!__sysctl_vsyscall))
gettimeofday(&tv, NULL);
else
do_vgettimeofday(&tv);
if (t)
*t = tv.tv_sec;
return tv.tv_sec;
......@@ -139,12 +136,13 @@ static long __vsyscall(2) venosys_0(void)
static long __vsyscall(3) venosys_1(void)
{
return -ENOSYS;
}
static void __init map_vsyscall(void)
{
extern char __vsyscall_0;
unsigned long physaddr_page0 = (unsigned long) &__vsyscall_0 - __START_KERNEL_map;
unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
}
......
/*
* ACPI S3 entry/exit handling.
*
* Notes:
* Relies on kernel being loaded below 4GB.
* Needs restore_low_mappings called before.
*
* Copyright 2003 by Andi Kleen, SuSE Labs.
*
* Long mode entry losely based on example code in chapter 14 of the x86-64 system
* programmer's manual.
*
* Notebook:
FIXME need to interface with suspend.c properly. do_magic. check i386. rename to suspend64.S
Need to fix vgacon,mtrr,bluesmoke to do resume
Interrupts should be off until the io-apic code has reinited the APIC.
Need support for that in the pm frame work or a special hack?
SMP support is non existent. Need to somehow restart the other CPUs again.
If CPU hotplug was working it could be used. Save/Restore needs to run on the same CPU.
Should check magic like i386 code
suspend code copies something. check what it is.
*/
#include <linux/linkage.h>
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/page.h>
#define O(x) (x-acpi_wakeup)
.text
.code16
ENTRY(acpi_wakeup)
/* 16bit real mode entered from ACPI BIOS */
/* The machine is just through BIOS setup after power down and everything set up
by Linux needs to be restored. */
/* The code here needs to be position independent or manually relocated,
because it is copied to a <1MB page for real mode execution */
/* A20 enabled (according to ACPI spec) */
/* cs = acpi_wakeup >> 4 ; eip = acpi_wakeup & 0xF */
movw %cs,%ax
movw %ax,%ds /* make %ds point to acpi_wakeup */
movw %ax,%ss
movw $O(wakeup_stack),%sp /* setup stack */
pushl $0
popfl /* clear EFLAGS */
lgdt %ds:O(pGDT) /* load kernel GDT */
movl $0x1,%eax /* enable protected mode */
movl %eax,%cr0
movl %ds:O(wakeup_page_table),%edi
ljmpl $__KERNEL16_CS,$0 /* -> s3_prot16 (filled in earlier by caller) */
/* patched by s3_restore_state below */
pGDT:
.short 0
.quad 0
.align 4
.globl wakeup_page_table
wakeup_page_table:
.long 0
.align 8
wakeup_stack:
.fill 128,1,0
.globl acpi_wakeup_end
acpi_wakeup_end:
/* end of real mode trampoline */
/* pointed to by __KERNEL16_CS:0 */
.code16
ENTRY(s3_prot16)
/* Now in 16bit protected mode, still no paging, stack/data segments invalid */
/* Prepare everything for 64bit paging, but still keep it turned off */
movl %cr4,%eax
bts $5,%eax /* set PAE bit */
movl %eax,%cr4
movl %edi,%cr3 /* load kernel page table */
movl $0x80000001,%eax
cpuid /* no execute supported ? */
movl %edx,%esi
movl $MSR_EFER,%ecx
rdmsr
bts $8,%eax /* long mode */
bt $20,%esi /* NX supported ? */
jnc 1f
bt $_EFER_NX,%eax
1:
wrmsr /* set temporary efer - real one is restored a bit later */
movl %cr0,%eax
bts $31,%eax /* paging */
movl %eax,%cr0
/* running in identity mapping now */
/* go to 64bit code segment */
ljmpl $__KERNEL_CS,$s3_restore_state-__START_KERNEL_map
.code64
.macro SAVEMSR msr,target
movl $\msr,%ecx
rdmsr
shlq $32,%rdx
orq %rax,%rdx
movq %rdx,\target(%rip)
.endm
.macro RESTMSR msr,src
movl $\msr,%ecx
movq \src(%rip),%rax
movq %rax,%rdx
shrq $32,%rdx
wrmsr
.endm
.macro SAVECTL reg
movq %\reg,%rax
movq %rax,saved_\reg(%rip)
.endm
.macro RESTCTL reg
movq saved_\reg(%rip),%rax
movq %rax,%\reg
.endm
/* Running in identity mapping, long mode */
s3_restore_state_low:
movq $s3_restore_state,%rax
jmpq *%rax
/* Running in real kernel mapping now */
s3_restore_state:
xorl %eax,%eax
movl %eax,%ds
movq saved_rsp(%rip),%rsp
movw saved_ss(%rip),%ss
movw saved_fs(%rip),%fs
movw saved_gs(%rip),%gs
movw saved_es(%rip),%es
movw saved_ds(%rip),%ds
lidt saved_idt
ltr saved_tr
lldt saved_ldt
/* gdt is already loaded */
RESTCTL cr0
RESTCTL cr4
/* cr3 is already loaded */
RESTMSR MSR_EFER,saved_efer
RESTMSR MSR_LSTAR,saved_lstar
RESTMSR MSR_CSTAR,saved_cstar
RESTMSR MSR_FS_BASE,saved_fs_base
RESTMSR MSR_GS_BASE,saved_gs_base
RESTMSR MSR_KERNEL_GS_BASE,saved_kernel_gs_base
RESTMSR MSR_SYSCALL_MASK,saved_syscall_mask
fxrstor fpustate(%rip)
RESTCTL dr0
RESTCTL dr1
RESTCTL dr2
RESTCTL dr3
RESTCTL dr6
RESTCTL dr7
movq saved_rflags(%rip),%rax
pushq %rax
popfq
movq saved_rbp(%rip),%rbp
movq saved_rbx(%rip),%rbx
movq saved_r12(%rip),%r12
movq saved_r13(%rip),%r13
movq saved_r14(%rip),%r14
movq saved_r15(%rip),%r15
ret
ENTRY(acpi_prepare_wakeup)
sgdt saved_gdt
/* copy gdt descr and page table to low level wakeup code so that it can
reload them early. */
movq acpi_wakeup_address(%rip),%rax
movw saved_gdt+8(%rip),%cx
movw %cx,O(pGDT)+8(%rax)
movq saved_gdt(%rip),%rcx
movq %rcx,O(pGDT)(%rax)
movq %cr3,%rdi
movl %edi,O(wakeup_page_table)(%rax)
ret
/* Save CPU state. */
/* Everything saved here needs to be restored above. */
ENTRY(do_suspend_lowlevel)
testl %edi,%edi
jnz s3_restore_state
SAVECTL cr0
SAVECTL cr4
SAVECTL cr3
str saved_tr
sidt saved_idt
sgdt saved_gdt
sldt saved_ldt
SAVEMSR MSR_EFER,saved_efer
SAVEMSR MSR_LSTAR,saved_lstar
SAVEMSR MSR_CSTAR,saved_cstar
SAVEMSR MSR_FS_BASE,saved_fs_base
SAVEMSR MSR_GS_BASE,saved_gs_base
SAVEMSR MSR_KERNEL_GS_BASE,saved_kernel_gs_base
SAVEMSR MSR_SYSCALL_MASK,saved_syscall_mask
movw %ds,saved_ds(%rip)
movw %es,saved_es(%rip)
movw %fs,saved_fs(%rip)
movw %gs,saved_gs(%rip)
movw %ss,saved_ss(%rip)
movq %rsp,saved_rsp(%rip)
pushfq
popq %rax
movq %rax,saved_rflags(%rip)
SAVECTL dr0
SAVECTL dr1
SAVECTL dr2
SAVECTL dr3
SAVECTL dr6
SAVECTL dr7
fxsave fpustate(%rip)
/* finally save callee saved registers */
movq %rbp,saved_rbp(%rip)
movq %rbx,saved_rbx(%rip)
movq %r12,saved_r12(%rip)
movq %r13,saved_r13(%rip)
movq %r14,saved_r14(%rip)
movq %r15,saved_r15(%rip)
movq $3,%rdi
call acpi_enter_sleep_state
ret /* should not happen */
.data
.align 8
saved_efer: .quad 0
saved_lstar: .quad 0
saved_cstar: .quad 0
saved_cr4: .quad 0
saved_cr3: .quad 0
saved_cr0: .quad 0
saved_rbp: .quad 0
saved_rbx: .quad 0
saved_rsp: .quad 0
saved_r12: .quad 0
saved_r13: .quad 0
saved_r14: .quad 0
saved_r15: .quad 0
saved_rflags: .quad 0
saved_gs_base: .quad 0
saved_fs_base: .quad 0
saved_kernel_gs_base: .quad 0
saved_syscall_mask: .quad 0
saved_dr0: .quad 0
saved_dr1: .quad 0
saved_dr2: .quad 0
saved_dr3: .quad 0
saved_dr6: .quad 0
saved_dr7: .quad 0
saved_ds: .short 0
saved_fs: .short 0
saved_gs: .short 0
saved_es: .short 0
saved_ss: .short 0
saved_idt: .short 0
.quad 0
saved_ldt: .short 0
saved_gdt: .short 0
.quad 0
saved_tr: .short 0
.align 16
fpustate: .fill 512,1,0
#
# Makefile for the linux i386-specific parts of the memory manager.
# Makefile for the linux x86_64-specific parts of the memory manager.
#
obj-y := init.o fault.o ioremap.o extable.o pageattr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_DISCONTIGMEM) += numa.o
obj-$(CONFIG_K8_NUMA) += k8topology.o
......@@ -121,7 +121,10 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
/* get the address */
__asm__("movq %%cr2,%0":"=r" (address));
if (page_fault_trace)
if (likely(regs->eflags & X86_EFLAGS_IF))
local_irq_enable();
if (unlikely(page_fault_trace))
printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
......@@ -139,7 +142,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
if (in_atomic() || !mm)
if (unlikely(in_atomic() || !mm))
goto no_context;
again:
......@@ -148,7 +151,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
vma = find_vma(mm, address);
if (!vma)
goto bad_area;
if (vma->vm_start <= address)
if (likely(vma->vm_start <= address))
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
......@@ -222,7 +225,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
return;
}
#endif
printk("%s[%d] segfault at rip:%lx rsp:%lx adr:%lx err:%lx\n",
printk(KERN_INFO
"%s[%d] segfault at rip:%lx rsp:%lx adr:%lx err:%lx\n",
tsk->comm, tsk->pid, regs->rip, regs->rsp, address,
error_code);
......
......@@ -162,6 +162,37 @@ follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
return i;
}
struct page *
follow_huge_addr(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, int write)
{
return NULL;
}
struct vm_area_struct *hugepage_vma(struct mm_struct *mm, unsigned long addr)
{
return NULL;
}
int pmd_huge(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_PSE);
}
struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write)
{
struct page *page;
page = pte_page(*(pte_t *)pmd);
if (page) {
page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT);
get_page(page);
}
return page;
}
void free_huge_page(struct page *page)
{
BUG_ON(page_count(page));
......@@ -193,8 +224,6 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsig
BUG_ON(start & (HPAGE_SIZE - 1));
BUG_ON(end & (HPAGE_SIZE - 1));
spin_lock(&htlbpage_lock);
spin_unlock(&htlbpage_lock);
for (address = start; address < end; address += HPAGE_SIZE) {
pte = huge_pte_offset(mm, address);
page = pte_page(*pte);
......@@ -216,7 +245,7 @@ void zap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigne
int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
{
struct mm_struct *mm = current->mm;
struct inode = mapping->host;
struct inode *inode = mapping->host;
unsigned long addr;
int ret = 0;
......
......@@ -3,7 +3,7 @@
*
* Copyright (C) 1995 Linus Torvalds
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
* Copyright (C) 2002 Andi Kleen <ak@suse.de>
* Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
*/
#include <linux/config.h>
......@@ -37,8 +37,9 @@
#include <asm/tlb.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/smp.h>
unsigned long start_pfn, end_pfn;
#define Dprintk(x...) printk(x)
struct mmu_gather mmu_gathers[NR_CPUS];
......@@ -90,9 +91,11 @@ static void *spp_getpage(void)
if (after_bootmem)
ptr = (void *) get_zeroed_page(GFP_ATOMIC);
else
ptr = alloc_bootmem_low(PAGE_SIZE);
if (!ptr)
ptr = alloc_bootmem_pages(PAGE_SIZE);
if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
Dprintk("spp_getpage %p\n", ptr);
return ptr;
}
......@@ -104,6 +107,8 @@ static void set_pte_phys(unsigned long vaddr,
pmd_t *pmd;
pte_t *pte;
Dprintk("set_pte_phys %lx to %lx\n", vaddr, phys);
level4 = pml4_offset_k(vaddr);
if (pml4_none(*level4)) {
printk("PML4 FIXMAP MISSING, it should be setup in head.S!\n");
......@@ -114,7 +119,7 @@ static void set_pte_phys(unsigned long vaddr,
pmd = (pmd_t *) spp_getpage();
set_pgd(pgd, __pgd(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
if (pmd != pmd_offset(pgd, 0)) {
printk("PAGETABLE BUG #01!\n");
printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pgd,0));
return;
}
}
......@@ -128,6 +133,7 @@ static void set_pte_phys(unsigned long vaddr,
}
}
pte = pte_offset_kernel(pmd, vaddr);
/* CHECKME: */
if (pte_val(*pte))
pte_ERROR(*pte);
set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, prot));
......@@ -151,7 +157,8 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
set_pte_phys(address, phys, prot);
}
extern unsigned long start_pfn, end_pfn;
unsigned long __initdata table_start, table_end;
extern pmd_t temp_boot_pmds[];
static struct temp_map {
......@@ -168,21 +175,21 @@ static __init void *alloc_low_page(int *index, unsigned long *phys)
{
struct temp_map *ti;
int i;
unsigned long pfn = start_pfn++, paddr;
unsigned long pfn = table_end++, paddr;
void *adr;
if (pfn >= end_pfn_map)
if (pfn >= end_pfn)
panic("alloc_low_page: ran out of memory");
for (i = 0; temp_mappings[i].allocated; i++) {
if (!temp_mappings[i].pmd)
panic("alloc_low_page: ran out of temp mappings");
}
ti = &temp_mappings[i];
paddr = (pfn & (~511)) << PAGE_SHIFT;
paddr = (pfn << PAGE_SHIFT) & PMD_MASK;
set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE));
ti->allocated = 1;
__flush_tlb();
adr = ti->address + (pfn & 511)*PAGE_SIZE;
adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
*index = i;
*phys = pfn * PAGE_SIZE;
return adr;
......@@ -203,20 +210,26 @@ static void __init phys_pgd_init(pgd_t *pgd, unsigned long address, unsigned lon
pgd = pgd + i;
for (; i < PTRS_PER_PGD; pgd++, i++) {
int map;
unsigned long paddr = i*PGDIR_SIZE, pmd_phys;
unsigned long paddr, pmd_phys;
pmd_t *pmd;
paddr = (address & PML4_MASK) + i*PGDIR_SIZE;
if (paddr >= end) {
for (; i < PTRS_PER_PGD; i++, pgd++)
set_pgd(pgd, __pgd(0));
break;
}
if (!e820_mapped(paddr, paddr+PGDIR_SIZE, 0)) {
set_pgd(pgd, __pgd(0));
continue;
}
pmd = alloc_low_page(&map, &pmd_phys);
set_pgd(pgd, __pgd(pmd_phys | _KERNPG_TABLE));
for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
unsigned long pe;
paddr = i*PGDIR_SIZE + j*PMD_SIZE;
if (paddr >= end) {
for (; j < PTRS_PER_PMD; j++, pmd++)
set_pmd(pmd, __pmd(0));
......@@ -239,13 +252,37 @@ void __init init_memory_mapping(void)
unsigned long adr;
unsigned long end;
unsigned long next;
unsigned long pgds, pmds, tables;
Dprintk("init_memory_mapping\n");
end = end_pfn_map << PAGE_SHIFT;
/*
* Find space for the kernel direct mapping tables.
* Later we should allocate these tables in the local node of the memory
* mapped. Unfortunately this is done currently before the nodes are
* discovered.
*/
pgds = (end + PGDIR_SIZE - 1) >> PGDIR_SHIFT;
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
tables = round_up(pgds*8, PAGE_SIZE) + round_up(pmds * 8, PAGE_SIZE);
table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables);
if (table_start == -1UL)
panic("Cannot find space for the kernel page tables");
table_start >>= PAGE_SHIFT;
table_end = table_start;
end += __PAGE_OFFSET; /* turn virtual */
end = PAGE_OFFSET + (end_pfn_map * PAGE_SIZE);
for (adr = PAGE_OFFSET; adr < end; adr = next) {
int map;
unsigned long pgd_phys;
pgd_t *pgd = alloc_low_page(&map, &pgd_phys);
next = adr + (512UL * 1024 * 1024 * 1024);
next = adr + PML4_SIZE;
if (next > end)
next = end;
phys_pgd_init(pgd, adr-PAGE_OFFSET, next-PAGE_OFFSET);
......@@ -254,20 +291,35 @@ void __init init_memory_mapping(void)
}
asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
__flush_tlb_all();
early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
table_start<<PAGE_SHIFT,
table_end<<PAGE_SHIFT);
}
extern struct x8664_pda cpu_pda[NR_CPUS];
void __init zap_low_mappings (void)
static unsigned long low_pml4[NR_CPUS];
void swap_low_mappings(void)
{
int i;
for (i = 0; i < NR_CPUS; i++) {
if (cpu_pda[i].level4_pgt)
cpu_pda[i].level4_pgt[0] = 0;
unsigned long t;
if (!cpu_pda[i].level4_pgt)
continue;
t = cpu_pda[i].level4_pgt[0];
cpu_pda[i].level4_pgt[0] = low_pml4[i];
low_pml4[i] = t;
}
flush_tlb_all();
}
void zap_low_mappings(void)
{
swap_low_mappings();
}
#ifndef CONFIG_DISCONTIGMEM
void __init paging_init(void)
{
{
......@@ -286,7 +338,7 @@ void __init paging_init(void)
}
return;
}
#endif
static inline int page_is_ram (unsigned long pagenr)
{
......@@ -315,36 +367,45 @@ void __init mem_init(void)
int codesize, reservedpages, datasize, initsize;
int tmp;
if (!mem_map)
BUG();
/* How many end-of-memory variables you have, grandma! */
max_low_pfn = end_pfn;
max_pfn = end_pfn;
max_mapnr = num_physpages = end_pfn;
num_physpages = end_pfn;
high_memory = (void *) __va(end_pfn * PAGE_SIZE);
/* clear the zero-page */
memset(empty_zero_page, 0, PAGE_SIZE);
reservedpages = 0;
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
#ifdef CONFIG_DISCONTIGMEM
totalram_pages += numa_free_all_bootmem();
tmp = 0;
/* should count reserved pages here for all nodes */
#else
max_mapnr = end_pfn;
if (!mem_map) BUG();
after_bootmem = 1;
totalram_pages += free_all_bootmem();
reservedpages = 0;
for (tmp = 0; tmp < end_pfn; tmp++)
/*
* Only count reserved RAM pages
*/
if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
reservedpages++;
#endif
after_bootmem = 1;
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init)\n",
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
max_mapnr << (PAGE_SHIFT-10),
end_pfn << (PAGE_SHIFT-10),
codesize >> 10,
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
......@@ -392,3 +453,16 @@ void free_initrd_mem(unsigned long start, unsigned long end)
}
}
#endif
void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
{
/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_DISCONTIGMEM
int nid = phys_to_nid(phys);
if (phys < HIGH_MEMORY && nid)
panic("reserve of %lx at node %d", phys, nid);
reserve_bootmem_node(NODE_DATA(nid), phys, len);
#else
reserve_bootmem(phys, len);
#endif
}
......@@ -133,14 +133,16 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag
*/
if (phys_addr < virt_to_phys(high_memory)) {
char *t_addr, *t_end;
struct page *page;
t_addr = __va(phys_addr);
t_end = t_addr + (size - 1);
#ifndef CONFIG_DISCONTIGMEM
struct page *page;
for(page = virt_to_page(t_addr); page <= virt_to_page(t_end); page++)
if(!PageReserved(page))
return NULL;
#endif
}
/*
......
/*
* AMD K8 NUMA support.
* Discover the memory map and associated nodes.
*
* Doesn't use the ACPI SRAT table because it has a questionable license.
* Instead the northbridge registers are read directly.
* XXX in 2.5 we could use the generic SRAT code
*
* Copyright 2002,2003 Andi Kleen, SuSE Labs.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/module.h>
#include <asm/io.h>
#include <linux/pci_ids.h>
#include <asm/types.h>
#include <asm/mmzone.h>
#include <asm/proto.h>
#include <asm/e820.h>
#include <asm/pci-direct.h>
#include <asm/numa.h>
static int find_northbridge(void)
{
int num;
for (num = 0; num < 32; num++) {
u32 header;
header = read_pci_config(0, num, 0, 0x00);
if (header != (PCI_VENDOR_ID_AMD | (0x1100<<16)))
continue;
header = read_pci_config(0, num, 1, 0x00);
if (header != (PCI_VENDOR_ID_AMD | (0x1101<<16)))
continue;
return num;
}
return -1;
}
int __init k8_scan_nodes(unsigned long start, unsigned long end)
{
unsigned long prevbase;
struct node nodes[MAXNODE];
int nodeid, numnodes, maxnode, i, nb;
nb = find_northbridge();
if (nb < 0)
return nb;
printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
numnodes = (read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3;
memset(&nodes,0,sizeof(nodes));
prevbase = 0;
maxnode = -1;
for (i = 0; i < MAXNODE; i++) {
unsigned long base,limit;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
nodeid = limit & 3;
if (!limit) {
printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i, base);
continue;
}
if ((base >> 8) & 3 || (limit >> 8) & 3) {
printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
nodeid, (base>>8)&3, (limit>>8) & 3);
return -1;
}
if (nodeid > maxnode)
maxnode = nodeid;
if ((1UL << nodeid) & nodes_present) {
printk("Node %d already present. Skipping\n", nodeid);
continue;
}
limit >>= 16;
limit <<= 24;
if (limit > end_pfn_map << PAGE_SHIFT)
limit = end_pfn_map << PAGE_SHIFT;
if (limit <= base) {
printk(KERN_INFO "Node %d beyond memory map\n", nodeid);
continue;
}
base >>= 16;
base <<= 24;
if (base < start)
base = start;
if (limit > end)
limit = end;
if (limit == base)
continue;
if (limit < base) {
printk(KERN_INFO"Node %d bogus settings %lx-%lx. Ignored.\n",
nodeid, base, limit);
continue;
}
/* Could sort here, but pun for now. Should not happen anyroads. */
if (prevbase > base) {
printk(KERN_INFO "Node map not sorted %lx,%lx\n",
prevbase,base);
return -1;
}
printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit);
nodes[nodeid].start = base;
nodes[nodeid].end = limit;
prevbase = base;
}
if (maxnode <= 0)
return -1;
memnode_shift = compute_hash_shift(nodes,maxnode,end);
if (memnode_shift < 0) {
printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
return -1;
}
printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
early_for_all_nodes(i) {
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
return 0;
}
/*
* Generic VM initialization for x86-64 NUMA setups.
* Copyright 2002,2003 Andi Kleen, SuSE Labs.
*/
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/mmzone.h>
#include <linux/blk.h>
#include <linux/ctype.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/dma.h>
#include <asm/numa.h>
#define Dprintk(x...) printk(x)
struct pglist_data *node_data[MAXNODE];
bootmem_data_t plat_node_bdata[MAX_NUMNODES];
int memnode_shift;
u8 memnodemap[NODEMAPSIZE];
static int numa_off __initdata;
unsigned long nodes_present;
int maxnode;
static int emunodes __initdata;
int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem)
{
int i;
int shift = 24;
u64 addr;
/* When in doubt use brute force. */
while (shift < 48) {
memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
early_for_all_nodes (i) {
for (addr = nodes[i].start;
addr < nodes[i].end;
addr += (1UL << shift)) {
if (memnodemap[addr >> shift] != 0xff) {
printk("node %d shift %d addr %Lx conflict %d\n",
i, shift, addr, memnodemap[addr>>shift]);
goto next;
}
memnodemap[addr >> shift] = i;
}
}
return shift;
next:
shift++;
}
memset(memnodemap,0,sizeof(*memnodemap) * NODEMAPSIZE);
return -1;
}
/* Initialize bootmem allocator for a node */
void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
{
unsigned long start_pfn, end_pfn, bootmap_pages, bootmap_size, bootmap_start;
unsigned long nodedata_phys;
const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
start = round_up(start, ZONE_ALIGN);
printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end);
start_pfn = start >> PAGE_SHIFT;
end_pfn = end >> PAGE_SHIFT;
nodedata_phys = find_e820_area(start, end, pgdat_size);
if (nodedata_phys == -1L)
panic("Cannot find memory pgdat in node %d\n", nodeid);
Dprintk("nodedata_phys %lx\n", nodedata_phys);
node_data[nodeid] = phys_to_virt(nodedata_phys);
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid];
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
NODE_DATA(nodeid)->node_size = end_pfn - start_pfn;
/* Find a place for the bootmem map */
bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
bootmap_start = find_e820_area(bootmap_start, end, bootmap_pages<<PAGE_SHIFT);
if (bootmap_start == -1L)
panic("Not enough continuous space for bootmap on node %d", nodeid);
Dprintk("bootmap start %lu pages %lu\n", bootmap_start, bootmap_pages);
bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
bootmap_start >> PAGE_SHIFT,
start_pfn, end_pfn);
e820_bootmem_free(NODE_DATA(nodeid), start, end);
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
if (nodeid > maxnode)
maxnode = nodeid;
nodes_present |= (1UL << nodeid);
}
/* Initialize final allocator for a zone */
void __init setup_node_zones(int nodeid)
{
unsigned long start_pfn, end_pfn;
unsigned long zones[MAX_NR_ZONES];
unsigned long dma_end_pfn;
memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES);
start_pfn = node_start_pfn(nodeid);
end_pfn = node_end_pfn(nodeid);
printk("setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn);
/* All nodes > 0 have a zero length zone DMA */
dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT;
if (start_pfn < dma_end_pfn) {
zones[ZONE_DMA] = dma_end_pfn - start_pfn;
zones[ZONE_NORMAL] = end_pfn - dma_end_pfn;
} else {
zones[ZONE_NORMAL] = end_pfn - start_pfn;
}
free_area_init_node(nodeid, NODE_DATA(nodeid), NULL, zones,
start_pfn, NULL);
}
int fake_node;
int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
{
#ifdef CONFIG_K8_NUMA
if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT))
return 0;
#endif
printk(KERN_INFO "%s\n",
numa_off ? "NUMA turned off" : "No NUMA configuration found");
if (!numa_off && emunodes > 0) {
struct node nodes[MAXNODE];
unsigned long nodesize = (end_pfn << PAGE_SHIFT) / emunodes;
int i;
if (emunodes > MAXNODE)
emunodes = MAXNODE;
printk(KERN_INFO "Faking %d nodes of size %ld MB\n", emunodes, nodesize>>20);
for (i = 0; i < emunodes; i++) {
unsigned long end = (i+1)*nodesize;
if (i == emunodes-1)
end = end_pfn << PAGE_SHIFT;
nodes[i].start = i * nodesize;
nodes[i].end = end;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
memnode_shift = compute_hash_shift(nodes, emunodes, nodes[i-1].end);
return 0;
}
printk(KERN_INFO "Faking a node at %016lx-%016lx\n",
start_pfn << PAGE_SHIFT,
end_pfn << PAGE_SHIFT);
/* setup dummy node covering all memory */
fake_node = 1;
memnode_shift = 63;
memnodemap[0] = 0;
setup_node_bootmem(0, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
return -1;
}
unsigned long __init numa_free_all_bootmem(void)
{
int i;
unsigned long pages = 0;
for_all_nodes(i) {
pages += free_all_bootmem_node(NODE_DATA(i));
}
return pages;
}
void __init paging_init(void)
{
int i;
for_all_nodes(i) {
setup_node_zones(i);
}
}
/* [numa=off] */
/* [numa=emunodes] */
__init int numa_setup(char *opt)
{
if (!strncmp(opt,"off",3))
numa_off = 1;
if (isdigit(opt[0]))
emunodes = simple_strtoul(opt, NULL, 10);
return 1;
}
......@@ -47,10 +47,10 @@ SECTIONS
.vsyscall_0 -10*1024*1024: AT ((LOADADDR(.data.cacheline_aligned) + SIZEOF(.data.cacheline_aligned) + 4095) & ~(4095)) { *(.vsyscall_0) }
__vsyscall_0 = LOADADDR(.vsyscall_0);
. = ALIGN(64);
.vxtime_sequence : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.vxtime_sequence) }
vxtime_sequence = LOADADDR(.vxtime_sequence);
.xtime_lock : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.xtime_lock) }
xtime_lock = LOADADDR(.xtime_lock);
. = ALIGN(16);
.hpet : AT ((LOADADDR(.vxtime_sequence) + SIZEOF(.vxtime_sequence) + 15) & ~(15)) { *(.hpet) }
.hpet : AT ((LOADADDR(.xtime_lock) + SIZEOF(.xtime_lock) + 15) & ~(15)) { *(.hpet) }
hpet = LOADADDR(.hpet);
. = ALIGN(16);
.wall_jiffies : AT ((LOADADDR(.hpet) + SIZEOF(.hpet) + 15) & ~(15)) { *(.wall_jiffies) }
......@@ -59,7 +59,10 @@ SECTIONS
.sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
sys_tz = LOADADDR(.sys_tz);
. = ALIGN(16);
.jiffies : AT ((LOADADDR(.sys_tz) + SIZEOF(.sys_tz) + 15) & ~(15)) { *(.jiffies) }
.sysctl_vsyscall : AT ((LOADADDR(.sys_tz) + SIZEOF(.sys_tz) + 15) & ~(15)) { *(.sysctl_vsyscall) }
sysctl_vsyscall = LOADADDR(.sysctl_vsyscall);
. = ALIGN(16);
.jiffies : AT ((LOADADDR(.sysctl_vsyscall) + SIZEOF(.sysctl_vsyscall) + 15) & ~(15)) { *(.jiffies) }
jiffies = LOADADDR(.jiffies);
. = ALIGN(16);
.xtime : AT ((LOADADDR(.jiffies) + SIZEOF(.jiffies) + 15) & ~(15)) { *(.xtime) }
......
......@@ -48,8 +48,8 @@
#define ACPI_ASM_MACROS
#define BREAKPOINT3
#define ACPI_DISABLE_IRQS() __cli()
#define ACPI_ENABLE_IRQS() __sti()
#define ACPI_DISABLE_IRQS() local_irq_disable()
#define ACPI_ENABLE_IRQS() local_irq_enable()
#define ACPI_FLUSH_CPU_CACHE() wbinvd()
/*
......@@ -142,6 +142,10 @@ extern void acpi_reserve_bootmem(void);
extern int acpi_disabled;
#define dmi_broken (0)
#define BROKEN_ACPI_Sx 0x0001
#define BROKEN_INIT_AFTER_S1 0x0002
#endif /*__KERNEL__*/
#endif /*_ASM_ACPI_H*/
......@@ -12,15 +12,6 @@ static inline struct task_struct *get_current(void)
return t;
}
#define stack_current() \
({ \
struct thread_info *ti; \
__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (~8191UL)); \
ti->task; \
})
#define current get_current()
#else
......
......@@ -135,6 +135,14 @@ static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
DESC_LDT, size);
}
static inline void set_seg_base(unsigned cpu, int entry, void *base)
{
struct desc_struct *d = &cpu_gdt_table[cpu][entry];
d->base0 = PTR_LOW(base);
d->base1 = PTR_MIDDLE(base);
d->base2 = PTR_HIGH(base);
}
#define LDT_entry_a(info) \
((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
#define LDT_entry_b(info) \
......
#ifndef _ASM_X8664_DMA_MAPPING_H
#define _ASM_X8664_DMA_MAPPING_H
#ifndef _X8664_DMA_MAPPING_H
#define _X8664_DMA_MAPPING_H 1
#include <asm-generic/dma-mapping.h>
......
......@@ -47,7 +47,7 @@ extern void add_memory_region(unsigned long start, unsigned long size,
int type);
extern void setup_memory_region(void);
extern void contig_e820_setup(void);
extern void e820_end_of_ram(void);
extern unsigned long e820_end_of_ram(void);
extern void e820_reserve_resources(void);
extern void e820_print_map(char *who);
extern int e820_mapped(unsigned long start, unsigned long end, int type);
......
......@@ -39,16 +39,10 @@ static inline int need_signal_i387(struct task_struct *me)
#define kernel_fpu_end() stts()
#define unlazy_fpu(tsk) do { \
if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) \
if ((tsk)->thread_info->flags & TIF_USEDFPU) \
save_init_fpu(tsk); \
} while (0)
#define unlazy_current_fpu() do { \
if (test_thread_flag(TIF_USEDFPU)) \
save_init_fpu(tsk); \
} while (0)
#define clear_fpu(tsk) do { \
if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) { \
asm volatile("fwait"); \
......@@ -134,7 +128,7 @@ static inline void save_init_fpu( struct task_struct *tsk )
{
asm volatile( "fxsave %0 ; fnclex"
: "=m" (tsk->thread.i387.fxsave));
clear_tsk_thread_flag(tsk, TIF_USEDFPU);
tsk->thread_info->flags &= ~TIF_USEDFPU;
stts();
}
......
#ifndef _ASM_IO_H
#define _ASM_IO_H
#include <linux/config.h>
/*
* This file contains the definitions for the x86 IO instructions
* inb/inw/inl/outb/outw/outl and the "string versions" of the same
......@@ -135,7 +137,12 @@ extern inline void * phys_to_virt(unsigned long address)
/*
* Change "struct page" to physical address.
*/
#ifdef CONFIG_DISCONTIGMEM
#include <asm/mmzone.h>
#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
#else
#define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT)
#endif
extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
......
#ifndef _ASM_MMSEGMENT_H
#define _ASM_MMSEGMENT_H 1
typedef struct {
unsigned long seg;
} mm_segment_t;
#endif
/* K8 NUMA support */
/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
/* 2.5 Version losely based on the NUMAQ Code by Pat Gaughen. */
#ifndef _ASM_X86_64_MMZONE_H
#define _ASM_X86_64_MMZONE_H 1
#include <linux/config.h>
#ifdef CONFIG_DISCONTIGMEM
#define VIRTUAL_BUG_ON(x)
#include <asm/numnodes.h>
#include <asm/smp.h>
#define MAXNODE 8
#define NODEMAPSIZE 0xff
/* Simple perfect hash to map physical addresses to node numbers */
extern int memnode_shift;
extern u8 memnodemap[NODEMAPSIZE];
extern int maxnode;
extern struct pglist_data *node_data[];
/* kern_addr_valid below hardcodes the same algorithm*/
static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
{
int nid;
VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE);
nid = memnodemap[addr >> memnode_shift];
VIRTUAL_BUG_ON(nid > maxnode);
return nid;
}
#define kvaddr_to_nid(kaddr) phys_to_nid(__pa(kaddr))
#define NODE_DATA(nid) (node_data[nid])
#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map)
#define node_mem_map(nid) (NODE_DATA(nid)->node_mem_map)
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
#define node_end_pfn(nid) (NODE_DATA(nid)->node_start_pfn + \
NODE_DATA(nid)->node_size)
#define node_size(nid) (NODE_DATA(nid)->node_size)
#define local_mapnr(kvaddr) \
( (__pa(kvaddr) >> PAGE_SHIFT) - node_start_pfn(kvaddr_to_nid(kvaddr)) )
#define kern_addr_valid(kvaddr) ({ \
int ok = 0; \
unsigned long index = __pa(kvaddr) >> memnode_shift; \
if (index <= NODEMAPSIZE) { \
unsigned nodeid = memnodemap[index]; \
unsigned long pfn = __pa(kvaddr) >> PAGE_SHIFT; \
unsigned long start_pfn = node_start_pfn(nodeid); \
ok = (nodeid != 0xff) && \
(pfn >= start_pfn) && \
(pfn < start_pfn + node_size(nodeid)); \
} \
ok; \
})
/* AK: this currently doesn't deal with invalid addresses. We'll see
if the 2.5 kernel doesn't pass them
(2.4 used to). */
#define pfn_to_page(pfn) ({ \
int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); \
((pfn) - node_start_pfn(nid)) + node_mem_map(nid); \
})
#define page_to_pfn(page) \
(long)(((page) - page_zone(page)->zone_mem_map) + page_zone(page)->zone_start_pfn)
/* AK: !DISCONTIGMEM just forces it to 1. Can't we too? */
#define pfn_valid(pfn) ((pfn) < num_physpages)
#endif
#endif
......@@ -185,7 +185,6 @@ extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];
extern int mp_current_pci_id;
extern unsigned long mp_lapic_addr;
extern int pic_mode;
extern int using_apic_timer;
#ifdef CONFIG_ACPI_BOOT
extern void mp_register_lapic (u8 id, u8 enabled);
......@@ -199,5 +198,7 @@ extern void mp_parse_prt (void);
#endif /*CONFIG_X86_IO_APIC*/
#endif
extern int using_apic_timer;
#endif
......@@ -67,6 +67,61 @@
: "=a" (low), "=d" (high) \
: "c" (counter))
extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
__asm__("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (op));
}
/*
* CPUID functions returning a single datum
*/
extern inline unsigned int cpuid_eax(unsigned int op)
{
unsigned int eax;
__asm__("cpuid"
: "=a" (eax)
: "0" (op)
: "bx", "cx", "dx");
return eax;
}
extern inline unsigned int cpuid_ebx(unsigned int op)
{
unsigned int eax, ebx;
__asm__("cpuid"
: "=a" (eax), "=b" (ebx)
: "0" (op)
: "cx", "dx" );
return ebx;
}
extern inline unsigned int cpuid_ecx(unsigned int op)
{
unsigned int eax, ecx;
__asm__("cpuid"
: "=a" (eax), "=c" (ecx)
: "0" (op)
: "bx", "dx" );
return ecx;
}
extern inline unsigned int cpuid_edx(unsigned int op)
{
unsigned int eax, edx;
__asm__("cpuid"
: "=a" (eax), "=d" (edx)
: "0" (op)
: "bx", "cx");
return edx;
}
#endif
/* AMD/K8 specific MSRs */
......
#ifndef _ASM_X8664_NUMA_H
#define _ASM_X8664_NUMA_H 1
#define MAXNODE 8
#define NODEMASK 0xff
struct node {
u64 start,end;
};
#define for_all_nodes(x) for ((x) = 0; (x) <= maxnode; (x)++) \
if ((1UL << (x)) & nodes_present)
#define early_for_all_nodes(n) \
for (n=0; n<MAXNODE;n++) if (nodes[n].start!=nodes[n].end)
extern int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem);
extern unsigned long nodes_present;
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
#endif
#ifndef _ASM_X8664_NUMNODES_H
#define _ASM_X8664_NUMNODES_H 1
#include <linux/config.h>
#ifdef CONFIG_DISCONTIGMEM
#define MAX_NUMNODES 8 /* APIC limit currently */
#else
#define MAX_NUMNODES 1
#endif
#endif
#ifndef _X86_64_PAGE_H
#define _X86_64_PAGE_H
#include <linux/config.h>
/* PAGE_SHIFT determines the page size */
#define PAGE_SHIFT 12
#ifdef __ASSEMBLY__
......@@ -10,7 +12,13 @@
#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT))
#define THREAD_SIZE (2*PAGE_SIZE)
#define THREAD_ORDER 1
#ifdef __ASSEMBLY__
#define THREAD_SIZE (1 << (PAGE_SHIFT + THREAD_ORDER))
#else
#define THREAD_SIZE (1UL << (PAGE_SHIFT + THREAD_ORDER))
#endif
#define CURRENT_MASK (~(THREAD_SIZE-1))
#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
......@@ -58,7 +66,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
/* See Documentation/x86_64/mm.txt for a description of the layout. */
/* See Documentation/x86_64/mm.txt for a description of the memory map. */
#define __START_KERNEL 0xffffffff80100000
#define __START_KERNEL_map 0xffffffff80000000
#define __PAGE_OFFSET 0x0000010000000000
......@@ -100,10 +108,13 @@ extern __inline__ int get_order(unsigned long size)
__pa(v); })
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
#ifndef CONFIG_DISCONTIGMEM
#define pfn_to_page(pfn) (mem_map + (pfn))
#define page_to_pfn(page) ((unsigned long)((page) - mem_map))
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#endif
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
......
......@@ -55,7 +55,10 @@ asm volatile(op "q %0,%%gs:%c1"::"r" (val),"i"(pda_offset(field)):"memory"); bre
} \
} while (0)
/*
* AK: PDA read accesses should be neither volatile nor have an memory clobber.
* Unfortunately removing them causes all hell to break lose currently.
*/
#define pda_from_op(op,field) ({ \
typedef typeof_field(struct x8664_pda, field) T__; T__ ret__; \
switch (sizeof_field(struct x8664_pda, field)) { \
......
......@@ -14,8 +14,7 @@
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
{
set_pmd(pmd, __pmd(_PAGE_TABLE |
((u64)(pte - mem_map) << PAGE_SHIFT)));
set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
}
extern __inline__ pmd_t *get_pmd(void)
......@@ -76,6 +75,6 @@ extern inline void pte_free(struct page *pte)
}
#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte))
#define __pmd_free_tlb(tlb,x) do { } while (0)
#define __pmd_free_tlb(tlb,x) pmd_free(x)
#endif /* _X86_64_PGALLOC_H */
......@@ -103,6 +103,8 @@ static inline void set_pml4(pml4_t *dst, pml4_t val)
#define ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte, 0))
#define pte_same(a, b) ((a).pte == (b).pte)
#define PML4_SIZE (1UL << PML4_SHIFT)
#define PML4_MASK (~(PML4_SIZE-1))
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
......@@ -317,7 +319,8 @@ static inline pgd_t *current_pgd_offset_k(unsigned long address)
/* PMD - Level 2 access */
#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
#define pmd_page(pmd) (mem_map + ((pmd_val(pmd) & PTE_MASK)>>PAGE_SHIFT))
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
#define __pmd_offset(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \
__pmd_offset(address))
......@@ -372,7 +375,9 @@ typedef pte_t *pte_addr_t;
#endif /* !__ASSEMBLY__ */
#ifndef CONFIG_DISCONTIGMEM
#define kern_addr_valid(addr) (1)
#endif
#define io_remap_page_range remap_page_range
......
......@@ -17,6 +17,7 @@
#include <asm/msr.h>
#include <asm/current.h>
#include <asm/system.h>
#include <asm/mmsegment.h>
#define TF_MASK 0x00000100
#define IF_MASK 0x00000200
......@@ -109,64 +110,6 @@ extern void dodgy_tsc(void);
#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
/*
* Generic CPUID function
* FIXME: This really belongs to msr.h
*/
extern inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
__asm__("cpuid"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (op));
}
/*
* CPUID functions returning a single datum
*/
extern inline unsigned int cpuid_eax(unsigned int op)
{
unsigned int eax;
__asm__("cpuid"
: "=a" (eax)
: "0" (op)
: "bx", "cx", "dx");
return eax;
}
extern inline unsigned int cpuid_ebx(unsigned int op)
{
unsigned int eax, ebx;
__asm__("cpuid"
: "=a" (eax), "=b" (ebx)
: "0" (op)
: "cx", "dx" );
return ebx;
}
extern inline unsigned int cpuid_ecx(unsigned int op)
{
unsigned int eax, ecx;
__asm__("cpuid"
: "=a" (eax), "=c" (ecx)
: "0" (op)
: "bx", "dx" );
return ecx;
}
extern inline unsigned int cpuid_edx(unsigned int op)
{
unsigned int eax, edx;
__asm__("cpuid"
: "=a" (eax), "=d" (edx)
: "0" (op)
: "bx", "cx");
return edx;
}
/*
* Intel CPU features in CR4
*/
......@@ -210,36 +153,6 @@ static inline void clear_in_cr4 (unsigned long mask)
:"ax");
}
#if 0
/*
* Cyrix CPU configuration register indexes
*/
#define CX86_CCR0 0xc0
#define CX86_CCR1 0xc1
#define CX86_CCR2 0xc2
#define CX86_CCR3 0xc3
#define CX86_CCR4 0xe8
#define CX86_CCR5 0xe9
#define CX86_CCR6 0xea
#define CX86_CCR7 0xeb
#define CX86_DIR0 0xfe
#define CX86_DIR1 0xff
#define CX86_ARR_BASE 0xc4
#define CX86_RCR_BASE 0xdc
/*
* Cyrix CPU indexed register access macros
*/
#define getCx86(reg) ({ outb((reg), 0x22); inb(0x23); })
#define setCx86(reg, data) do { \
outb((reg), 0x22); \
outb((data), 0x23); \
} while (0)
#endif
/*
* Bus types
*/
......@@ -286,10 +199,6 @@ union i387_union {
struct i387_fxsave_struct fxsave;
};
typedef struct {
unsigned long seg;
} mm_segment_t;
struct tss_struct {
u32 reserved1;
u64 rsp0;
......@@ -302,7 +211,7 @@ struct tss_struct {
u16 reserved5;
u16 io_map_base;
u32 io_bitmap[IO_BITMAP_SIZE];
} __attribute__((packed));
} __attribute__((packed)) ____cacheline_aligned;
struct thread_struct {
unsigned long rsp0;
......@@ -336,6 +245,7 @@ struct thread_struct {
#define NMI_STACK 3
#define N_EXCEPTION_STACKS 3 /* hw limit: 7 */
#define EXCEPTION_STKSZ 1024
#define EXCEPTION_STK_ORDER 0
#define start_thread(regs,new_rip,new_rsp) do { \
asm volatile("movl %0,%%fs; movl %0,%%es; movl %0,%%ds": :"r" (0)); \
......@@ -378,6 +288,13 @@ extern inline void rep_nop(void)
__asm__ __volatile__("rep;nop": : :"memory");
}
/* Stop speculative execution */
extern inline void sync_core(void)
{
int tmp;
asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory");
}
#define cpu_has_fpu 1
#define ARCH_HAS_PREFETCH
......@@ -389,7 +306,6 @@ extern inline void rep_nop(void)
#define spin_lock_prefetch(x) prefetchw(x)
#define cpu_relax() rep_nop()
/*
* NSC/Cyrix CPU configuration register indexes
*/
......@@ -417,4 +333,11 @@ extern inline void rep_nop(void)
outb((data), 0x23); \
} while (0)
#define stack_current() \
({ \
struct thread_info *ti; \
asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
ti->task; \
})
#endif /* __ASM_X86_64_PROCESSOR_H */
......@@ -25,6 +25,8 @@ extern void iommu_hole_init(void);
extern void do_softirq_thunk(void);
extern int numa_setup(char *opt);
extern int setup_early_printk(char *);
extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
......@@ -36,18 +38,27 @@ extern unsigned long numa_free_all_bootmem(void);
extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
extern void free_bootmem_generic(unsigned long phys, unsigned len);
extern unsigned long start_pfn, end_pfn, end_pfn_map;
extern unsigned long end_pfn_map;
extern void show_stack(unsigned long * rsp);
extern void exception_table_check(void);
extern int acpi_boot_init(char *);
extern void acpi_reserve_bootmem(void);
extern void swap_low_mappings(void);
extern int map_syscall32(struct mm_struct *mm, unsigned long address);
extern char *syscall32_page;
void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
extern unsigned long max_mapnr;
extern unsigned long end_pfn;
extern unsigned long table_start, table_end;
struct thread_struct;
struct user_desc;
int do_set_thread_area(struct thread_struct *t, struct user_desc *u_info);
int do_get_thread_area(struct thread_struct *t, struct user_desc *u_info);
......
......@@ -19,13 +19,15 @@
#define __USER_DS 0x2b /* 5*8+3 */
#define __USER_CS 0x33 /* 6*8+3 */
#define __USER32_DS __USER_DS
#define __KERNEL16_CS (GDT_ENTRY_KERNELCS16 * 8)
#define GDT_ENTRY_TLS 1
#define GDT_ENTRY_TSS 8 /* needs two entries */
#define GDT_ENTRY_LDT 10
#define GDT_ENTRY_TLS_MIN 11
#define GDT_ENTRY_TLS_MAX 13
#define GDT_ENTRY_LONGBASE 14
/* 14 free */
#define GDT_ENTRY_KERNELCS16 15
#define GDT_ENTRY_TLS_ENTRIES 3
......
......@@ -44,7 +44,9 @@ extern void smp_send_reschedule(int cpu);
extern void smp_send_reschedule_all(void);
extern void smp_invalidate_rcv(void); /* Process an NMI */
extern void (*mtrr_hook) (void);
extern void zap_low_mappings (void);
extern void zap_low_mappings(void);
#define SMP_TRAMPOLINE_BASE 0x6000
/*
* On x86 all CPUs are mapped 1:1 to the APIC space.
......@@ -55,38 +57,26 @@ extern void zap_low_mappings (void);
extern volatile unsigned long cpu_callout_map;
#define cpu_possible(cpu) (cpu_callout_map & (1<<(cpu)))
#define cpu_online(cpu) (cpu_online_map & (1<<(cpu)))
extern inline int cpu_logical_map(int cpu)
{
return cpu;
}
extern inline int cpu_number_map(int cpu)
{
return cpu;
}
#define for_each_cpu(cpu, mask) \
for(mask = cpu_online_map; \
cpu = __ffs(mask), mask != 0; \
mask &= ~(1UL<<cpu))
extern inline unsigned int num_online_cpus(void)
extern inline int any_online_cpu(unsigned int mask)
{
return hweight32(cpu_online_map);
}
if (mask & cpu_online_map)
return __ffs(mask & cpu_online_map);
extern inline int find_next_cpu(unsigned cpu)
{
unsigned long left = cpu_online_map >> (cpu+1);
if (!left)
return -1;
return ffz(~left) + cpu;
}
extern inline int find_first_cpu(void)
extern inline unsigned int num_online_cpus(void)
{
return ffz(~cpu_online_map);
return hweight32(cpu_online_map);
}
/* RED-PEN different from i386 */
#define for_each_cpu(i) \
for((i) = find_first_cpu(); (i)>=0; (i)=find_next_cpu(i))
static inline int num_booting_cpus(void)
{
return hweight32(cpu_callout_map);
......@@ -94,28 +84,25 @@ static inline int num_booting_cpus(void)
extern volatile unsigned long cpu_callout_map;
/*
* Some lowlevel functions might want to know about
* the real APIC ID <-> CPU # mapping.
*/
extern volatile int x86_apicid_to_cpu[NR_CPUS];
extern volatile int x86_cpu_to_apicid[NR_CPUS];
/*
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(),
* so this is correct in the x86 case.
*/
#define smp_processor_id() read_pda(cpunumber)
extern __inline int hard_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
}
extern int disable_apic;
extern int slow_smp_processor_id(void);
extern inline int safe_smp_processor_id(void)
{
if (disable_apic)
return slow_smp_processor_id();
else
return hard_smp_processor_id();
}
#define cpu_online(cpu) (cpu_online_map & (1<<(cpu)))
#endif /* !ASSEMBLY */
......@@ -128,6 +115,7 @@ extern __inline int hard_smp_processor_id(void)
#ifndef CONFIG_SMP
#define stack_smp_processor_id() 0
#define safe_smp_processor_id() 0
#define for_each_cpu(x) (x)=0;
#define cpu_logical_map(x) (x)
#else
......@@ -135,7 +123,7 @@ extern __inline int hard_smp_processor_id(void)
#define stack_smp_processor_id() \
({ \
struct thread_info *ti; \
__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (~8191UL)); \
__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
ti->cpu; \
})
#endif
......
......@@ -15,7 +15,7 @@ extern int printk(const char * fmt, ...)
typedef struct {
volatile unsigned int lock;
#if CONFIG_DEBUG_SPINLOCK
#ifdef CONFIG_DEBUG_SPINLOCK
unsigned magic;
#endif
} spinlock_t;
......@@ -56,13 +56,56 @@ typedef struct {
/*
* This works. Despite all the confusion.
* (except on PPro SMP or if we are using OOSTORE)
* (PPro errata 66, 92)
*/
#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
#define spin_unlock_string \
"movb $1,%0"
"movb $1,%0" \
:"=m" (lock->lock) : : "memory"
static inline void _raw_spin_unlock(spinlock_t *lock)
{
#ifdef CONFIG_DEBUG_SPINLOCK
if (lock->magic != SPINLOCK_MAGIC)
BUG();
if (!spin_is_locked(lock))
BUG();
#endif
__asm__ __volatile__(
spin_unlock_string
);
}
#else
#define spin_unlock_string \
"xchgb %b0, %1" \
:"=q" (oldval), "=m" (lock->lock) \
:"0" (oldval) : "memory"
static inline void _raw_spin_unlock(spinlock_t *lock)
{
char oldval = 1;
#ifdef CONFIG_DEBUG_SPINLOCK
if (lock->magic != SPINLOCK_MAGIC)
BUG();
if (!spin_is_locked(lock))
BUG();
#endif
__asm__ __volatile__(
spin_unlock_string
);
}
#endif
static inline int _raw_spin_trylock(spinlock_t *lock)
{
signed char oldval;
char oldval;
__asm__ __volatile__(
"xchgb %b0,%1"
:"=q" (oldval), "=m" (lock->lock)
......@@ -85,18 +128,6 @@ printk("eip: %p\n", &&here);
:"=m" (lock->lock) : : "memory");
}
static inline void _raw_spin_unlock(spinlock_t *lock)
{
#ifdef CONFIG_DEBUG_SPINLOCK
if (lock->magic != SPINLOCK_MAGIC)
BUG();
if (!spin_is_locked(lock))
BUG();
#endif
__asm__ __volatile__(
spin_unlock_string
:"=m" (lock->lock) : : "memory");
}
/*
* Read-write spinlocks, allowing multiple readers
......@@ -127,6 +158,8 @@ typedef struct {
#define rwlock_init(x) do { *(x) = RW_LOCK_UNLOCKED; } while(0)
#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
/*
* On x86, we implement read-write locks as a 32-bit counter
* with the high bit (sign) being the "contended" bit.
......@@ -136,9 +169,9 @@ typedef struct {
* Changed to use the same technique as rw semaphores. See
* semaphore.h for details. -ben
*/
/* the spinlock helpers are in arch/x86_64/kernel/semaphore.S */
/* the spinlock helpers are in arch/i386/kernel/semaphore.c */
extern inline void _raw_read_lock(rwlock_t *rw)
static inline void _raw_read_lock(rwlock_t *rw)
{
#ifdef CONFIG_DEBUG_SPINLOCK
if (rw->magic != RWLOCK_MAGIC)
......@@ -168,6 +201,4 @@ static inline int _raw_write_trylock(rwlock_t *lock)
return 0;
}
#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
#endif /* __ASM_SPINLOCK_H */
......@@ -83,7 +83,7 @@ extern void load_gs_index(unsigned);
#define loadsegment(seg,value) \
asm volatile("\n" \
"1:\t" \
"movl %0,%%" #seg "\n" \
"movl %k0,%%" #seg "\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3:\t" \
......@@ -94,7 +94,7 @@ extern void load_gs_index(unsigned);
".align 8\n\t" \
".quad 1b,3b\n" \
".previous" \
: :"r" ((int)(value)))
: :"r" (value))
#define set_debug(value,register) \
__asm__("movq %0,%%db" #register \
......@@ -119,6 +119,13 @@ static inline void write_cr0(unsigned long val)
asm volatile("movq %0,%%cr0" :: "r" (val));
}
static inline unsigned long read_cr3(void)
{
unsigned long cr3;
asm("movq %%cr3,%0" : "=r" (cr3));
return cr3;
}
static inline unsigned long read_cr4(void)
{
unsigned long cr4;
......
......@@ -9,11 +9,8 @@
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
#include <asm/processor.h>
#include <linux/config.h>
#include <asm/pda.h>
#endif
#include <asm/page.h>
#include <asm/types.h>
/*
* low level task data that entry.S needs immediate access to
......@@ -21,6 +18,10 @@
* - this struct shares the supervisor stack pages
*/
#ifndef __ASSEMBLY__
struct task_struct;
struct exec_domain;
#include <asm/mmsegment.h>
struct thread_info {
struct task_struct *task; /* main task structure */
struct exec_domain *exec_domain; /* execution domain */
......@@ -31,7 +32,6 @@ struct thread_info {
mm_segment_t addr_limit;
struct restart_block restart_block;
};
#endif
/*
......@@ -55,27 +55,17 @@ struct thread_info {
#define init_thread_info (init_thread_union.thread_info)
#define init_stack (init_thread_union.stack)
/* how to get the thread information struct from C */
#define THREAD_SIZE (2*PAGE_SIZE)
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
ti = (void *)read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE;
return ti;
}
static inline struct thread_info *stack_thread_info(void)
{
struct thread_info *ti;
__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (~8191UL));
asm("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));
return ti;
}
/* thread information allocation */
#define alloc_thread_info() ((struct thread_info *) __get_free_pages(GFP_KERNEL,1))
#define free_thread_info(ti) free_pages((unsigned long) (ti), 1)
#define alloc_thread_info() \
((struct thread_info *) __get_free_pages(GFP_KERNEL,THREAD_ORDER))
#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER)
#define get_thread_info(ti) get_task_struct((ti)->task)
#define put_thread_info(ti) put_task_struct((ti)->task)
......@@ -84,7 +74,7 @@ static inline struct thread_info *stack_thread_info(void)
/* how to get the thread information struct from ASM */
/* only works on the process stack. otherwise get it via the PDA. */
#define GET_THREAD_INFO(reg) \
movq $-8192, reg; \
movq $CURRENT_MASK, reg; \
andq %rsp, reg
#endif
......
/*
* linux/include/asm-x8664/timex.h
* linux/include/asm-x86_64/timex.h
*
* x8664 architecture timex specifications
* x86-64 architecture timex specifications
*/
#ifndef _ASMx8664_TIMEX_H
#define _ASMx8664_TIMEX_H
......@@ -16,20 +16,6 @@
(1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \
<< (SHIFT_SCALE-SHIFT_HZ)) / HZ)
/*
* Standard way to access the cycle counter on i586+ CPUs.
* Currently only used on SMP.
*
* If you really have a SMP machine with i486 chips or older,
* compile for that, and this will just always return zero.
* That's ok, it just means that the nicer scheduling heuristics
* won't work for you.
*
* We only use the low 32 bits, and we'd simply better make sure
* that we reschedule before that wraps. Scheduling at least every
* four billion cycles just basically sounds like a good idea,
* regardless of how fast the machine is.
*/
typedef unsigned long long cycles_t;
extern cycles_t cacheflush_time;
......
#ifndef _ASM_X86_64_TOPOLOGY_H
#define _ASM_X86_64_TOPOLOGY_H
#include <linux/config.h>
#ifdef CONFIG_DISCONTIGMEM
/* Map the K8 CPU local memory controllers to a simple 1:1 CPU:NODE topology */
extern int fake_node;
extern unsigned long cpu_online_map;
#define cpu_to_node(cpu) (fake_node ? 0 : (cpu))
#define memblk_to_node(memblk) (fake_node ? 0 : (memblk))
#define parent_node(node) (node)
#define node_to_first_cpu(node) (fake_node ? 0 : (node))
#define node_to_cpu_mask(node) (fake_node ? cpu_online_map : (1UL << (node)))
#define node_to_memblk(node) (node)
#define NODE_BALANCE_RATE 30 /* CHECKME */
#endif
#include <asm-generic/topology.h>
#endif /* _ASM_X86_64_TOPOLOGY_H */
#endif
......@@ -500,8 +500,10 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address)
__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
#define __NR_semtimedop 220
__SYSCALL(__NR_semtimedop, sys_semtimedop)
#define __NR_fadvise64 221
__SYSCALL(__NR_fadvise64, sys_fadvise64)
#define __NR_syscall_max __NR_semtimedop
#define __NR_syscall_max __NR_fadvise64
#ifndef __NO_STUBS
/* user-visible error numbers are in the range -1 - -4095 */
......
......@@ -2,6 +2,7 @@
#define _ASM_X86_64_VSYSCALL_H_
#include <linux/time.h>
#include <linux/seqlock.h>
enum vsyscall_num {
__NR_vgettimeofday,
......@@ -19,8 +20,10 @@ enum vsyscall_num {
#define __section_wall_jiffies __attribute__ ((unused, __section__ (".wall_jiffies"), aligned(16)))
#define __section_jiffies __attribute__ ((unused, __section__ (".jiffies"), aligned(16)))
#define __section_sys_tz __attribute__ ((unused, __section__ (".sys_tz"), aligned(16)))
#define __section_sysctl_vsyscall __attribute__ ((unused, __section__ (".sysctl_vsyscall"), aligned(16)))
#define __section_xtime __attribute__ ((unused, __section__ (".xtime"), aligned(16)))
#define __section_vxtime_sequence __attribute__ ((unused, __section__ (".vxtime_sequence"), aligned(16)))
#define __section_xtime_lock __attribute__ ((unused, __section__ (".xtime_lock"), aligned(L1_CACHE_BYTES)))
struct hpet_data {
long address; /* base address */
......@@ -36,21 +39,21 @@ struct hpet_data {
#define hpet_writel(d,a) writel(d, fix_to_virt(FIX_HPET_BASE) + a)
/* vsyscall space (readonly) */
extern long __vxtime_sequence[2];
extern struct hpet_data __hpet;
extern struct timespec __xtime;
extern volatile unsigned long __jiffies;
extern unsigned long __wall_jiffies;
extern struct timezone __sys_tz;
extern seqlock_t __xtime_lock;
/* kernel space (writeable) */
extern long vxtime_sequence[2];
extern struct hpet_data hpet;
extern unsigned long wall_jiffies;
extern struct timezone sys_tz;
extern int sysctl_vsyscall;
extern seqlock_t xtime_lock;
#define vxtime_lock() do { vxtime_sequence[0]++; wmb(); } while(0)
#define vxtime_unlock() do { wmb(); vxtime_sequence[1]++; } while (0)
#define ARCH_HAVE_XTIME_LOCK 1
#endif /* __KERNEL__ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment