Commit 7abe2c67 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 merge for 2.6.4

The biggest new feature is fixed 32bit vsyscall (SYSCALL+SYSENTER)
support, mostly from Jakub Jelinek.  This increases 32bit syscall
performance greatly (latency halved and better).  The SYSENTER for Intel
support required some infrastructure changes, but seems to work now too.

The 64bit vsyscall vtime() just references xtime.tv_sec now.  This
should make it a lot faster too.

A fix for some Intel IA32e systems.  Also a few long standing bugs in
NMI like exception handlers were fixed.

And a lot of other bug fixes.

Full changeLog:
 - Clean up 32bit address room limit handling, fix 3gb personality
 - Move memcpy_{from,to}io export to ksyms.c file. This seems to work
   around a toolchain bug (Andreas Gruenbacher)
 - Update defconfig
 - ACPI merges from i386 (SBF should work now, acpi=strict)
 - Implement mmconfig support based on i386 code (untested)
 - Fix i386/x86-64 pci source file sharing
 - Implement ptrace access for 32bit vsyscall page
 - Always initialize all 32bit SYSENTER/SYSCALL MSRs.
 - Export run time cache line size to generic kernel
 - Remove explicit CPUID in ia32 syscall code
 - Fill in most of boot_cpu_data early
 - Remove unused PER_LINUX32 setup
 - Fix syscall trace in fast 32bit calls (Suresh B. Siddha)
 - Tighten first line of the oops again.
 - Set up ptrace registers correctly for debug,ss,double fault exceptions
 - Fix 64bit bug in sys_time64
 - Optimize time syscall/vsyscall to only read xtime
 - Fix csum_partial_copy_nocheck
 - Remove last traces of FPU emulation
 - Check properly for rescheduling in exceptions with own stack
 - Harden exception stack entries (#SS,#NMI,#MC,#DF,#DB) against bogus GS
 - Use an exception stack for machine checks
 - Handle TIF_SINGLESTEP properly in kernel exit
 - Add exception stack for debug handler
 - Disable X86_HT for Opteron optimized builds because it pulls in ACPI_BOOT
 - Fix CONFIG_ACPI_BOOT compilation without CONFIG_ACPI
 - Fix eflags handling in SYSENTER path (Jakub Jelinek)
 - Use atomic counter for enable/disable_hlt
 - Support 32bit SYSENTER vsyscall too (Jakub Jelinek)
 - Don't redefine Dprintk
 - Change some cpu/apic id arrays to char
 - Support arbitary cpu<->apicid in hard_smp_processor_id (Surresh B Sidda)
 - Move K8 erratum #100 workaround into slow path of page fault handler.
 - Fix 32bit cdrom direct access ioctls (Jens Axboe)
 - Enable 32bit vsyscalls by default
 - Fix 32bit vsyscalls (Jakub Jelinek)
parent 626942a4
......@@ -160,9 +160,10 @@ config X86_CPUID
with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
/dev/cpu/31/cpuid.
# disable it for opteron optimized builds because it pulls in ACPI_BOOT
config X86_HT
bool
depends on SMP
depends on SMP && !MK8
default y
config MATH_EMULATION
......@@ -330,6 +331,11 @@ config PCI_DIRECT
depends on PCI
default y
config PCI_MMCONFIG
bool "Support mmconfig PCI config space access"
depends on PCI
select ACPI_BOOT
# the drivers/pci/msi.c code needs to be fixed first before enabling
config PCI_USE_VECTOR
bool "Vector-based interrupt indexing"
......
......@@ -63,7 +63,7 @@ head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kern
libs-y += arch/x86_64/lib/
core-y += arch/x86_64/kernel/ arch/x86_64/mm/
core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/
drivers-$(CONFIG_PCI) += arch/i386/pci/
drivers-$(CONFIG_PCI) += arch/x86_64/pci/
drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
boot := arch/x86_64/boot
......
This diff is collapsed.
......@@ -11,18 +11,22 @@ obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
$(obj)/syscall32.o: $(src)/syscall32.c $(obj)/vsyscall.so
$(obj)/syscall32.o: $(src)/syscall32.c \
$(foreach F,sysenter syscall,$(obj)/vsyscall-$F.so)
# Teach kbuild about targets
targets := vsyscall.o vsyscall.so
targets := $(foreach F,sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
# The DSO images are built using a special linker script
quiet_cmd_vsyscall = SYSCALL $@
cmd_vsyscall = $(CC) -m32 -nostdlib -shared -s \
quiet_cmd_syscall = SYSCALL $@
cmd_syscall = $(CC) -m32 -nostdlib -shared -s \
-Wl,-soname=linux-gate.so.1 -o $@ \
-Wl,-T,$(filter-out FORCE,$^)
$(obj)/vsyscall.so: $(src)/vsyscall.lds $(obj)/vsyscall.o FORCE
$(call if_changed,vsyscall)
AFLAGS_vsyscall.o = -m32
$(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
$(call if_changed,syscall)
AFLAGS_vsyscall-sysenter.o = -m32
AFLAGS_vsyscall-syscall.o = -m32
CFLAGS_ia32_ioctl.o += -Ifs/
......@@ -32,7 +32,7 @@
#define AT_SYSINFO 32
#define AT_SYSINFO_EHDR 33
int sysctl_vsyscall32;
int sysctl_vsyscall32 = 1;
#define ARCH_DLINFO do { \
if (sysctl_vsyscall32) { \
......@@ -46,7 +46,7 @@ struct elf_phdr;
#define IA32_EMULATOR 1
#define ELF_ET_DYN_BASE (IA32_PAGE_OFFSET/3 + 0x1000000)
#define ELF_ET_DYN_BASE (TASK_UNMAPPED_32 + 0x1000000)
#undef ELF_ARCH
#define ELF_ARCH EM_386
......@@ -261,7 +261,6 @@ do { \
set_thread_flag(TIF_ABI_PENDING); \
else \
clear_thread_flag(TIF_ABI_PENDING); \
set_personality((ibcs2)?PER_SVR4:current->personality); \
} while (0)
/* Override some function names */
......
......@@ -273,8 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs regs)
sigset_t set;
unsigned int eax;
set_thread_flag(TIF_IRET);
if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__get_user(set.sig[0], &frame->sc.oldmask)
......@@ -305,8 +303,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs regs)
stack_t st;
unsigned int eax;
set_thread_flag(TIF_IRET);
if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
......
......@@ -12,6 +12,7 @@
#include <asm/ia32_unistd.h>
#include <asm/thread_info.h>
#include <asm/segment.h>
#include <asm/vsyscall32.h>
#include <linux/linkage.h>
.macro IA32_ARG_FIXUP noebp=0
......@@ -25,6 +26,99 @@
movl %edx,%edx /* zero extension */
.endm
/* clobbers %eax */
.macro CLEAR_RREGS
xorl %eax,%eax
movq %rax,R11(%rsp)
movq %rax,R10(%rsp)
movq %rax,R9(%rsp)
movq %rax,R8(%rsp)
.endm
/*
* 32bit SYSENTER instruction entry.
*
* Arguments:
* %eax System call number.
* %ebx Arg1
* %ecx Arg2
* %edx Arg3
* %esi Arg4
* %edi Arg5
* %ebp user stack
* 0(%ebp) Arg6
*
* Interrupts off.
*
* This is purely a fast path. For anything complicated we use the int 0x80
* path below. Set up a complete hardware stack frame to share code
* with the int 0x80 path.
*/
ENTRY(ia32_sysenter_target)
CFI_STARTPROC
swapgs
movq %gs:pda_kernelstack, %rsp
addq $(PDA_STACKOFFSET),%rsp
sti
movl %ebp,%ebp /* zero extension */
pushq $__USER32_DS
pushq %rbp
pushfq
movl $VSYSCALL32_SYSEXIT, %r10d
pushq $__USER32_CS
movl %eax, %eax
pushq %r10
pushq %rax
cld
SAVE_ARGS 0,0,1
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
1: movl (%rbp),%r9d
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
GET_THREAD_INFO(%r10)
bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
jc sysenter_tracesys
sysenter_do_call:
cmpl $(IA32_NR_syscalls),%eax
jae ia32_badsys
IA32_ARG_FIXUP 1
call *ia32_sys_call_table(,%rax,8)
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
jnz int_ret_from_sys_call
/* clear IF, that popfq doesn't enable interrupts early */
andl $~0x200,EFLAGS-R11(%rsp)
RESTORE_ARGS 1,24,1,1,1,1
popfq
popq %rcx /* User %esp */
movl $VSYSCALL32_SYSEXIT,%edx /* User %eip */
swapgs
sti /* sti only takes effect after the next instruction */
/* sysexit */
.byte 0xf, 0x35
sysenter_tracesys:
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
movl %ebp, %ebp
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
1: movl (%rbp),%r9d
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
jmp sysenter_do_call
CFI_ENDPROC
/*
* 32bit SYSCALL instruction entry.
*
......@@ -51,7 +145,7 @@ ENTRY(ia32_cstar_target)
movl %esp,%r8d
movq %gs:pda_kernelstack,%rsp
sti
SAVE_ARGS 8,1
SAVE_ARGS 8,1,1
movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp)
......@@ -66,47 +160,48 @@ ENTRY(ia32_cstar_target)
/* hardware stack frame is complete now */
1: movl (%r8),%r9d
.section __ex_table,"a"
.quad 1b,cstar_badarg
.quad 1b,ia32_badarg
.previous
GET_THREAD_INFO(%r10)
bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
jc ia32_tracesys
jc cstar_tracesys
cstar_do_call:
cmpl $IA32_NR_syscalls,%eax
jae ia32_badsys
IA32_ARG_FIXUP 1
call *ia32_sys_call_table(,%rax,8)
.globl cstar_sysret
/* label must directly follow call */
cstar_sysret:
movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10)
cli
testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
jnz 1f
RESTORE_ARGS 1,-ARG_SKIP,1,1
jnz int_ret_from_sys_call
RESTORE_ARGS 1,-ARG_SKIP,1,1,1
movl RIP-ARGOFFSET(%rsp),%ecx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
movl RSP-ARGOFFSET(%rsp),%esp
swapgs
sysretl
1:
btc $TIF_IRET,threadinfo_flags(%r10)
jmp int_ret_from_sys_call
cstar_tracesys:
SAVE_REST
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
movl RSP-ARGOFFSET(%rsp), %r8d
/* no need to do an access_ok check here because r8 has been
32bit zero extended */
1: movl (%r8),%r9d
.section __ex_table,"a"
.quad 1b,ia32_badarg
.previous
jmp cstar_do_call
cstar_badarg:
ia32_badarg:
movq $-EFAULT,%rax
jmp cstar_sysret
jmp ia32_sysret
CFI_ENDPROC
/*
......@@ -139,7 +234,7 @@ ENTRY(ia32_syscall)
cld
/* note the registers are not zero extended to the sf.
this could be a problem. */
SAVE_ARGS
SAVE_ARGS 0,0,1
GET_THREAD_INFO(%r10)
bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
jc ia32_tracesys
......@@ -148,6 +243,7 @@ ia32_do_syscall:
jae ia32_badsys
IA32_ARG_FIXUP
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp)
jmp int_ret_from_sys_call
......@@ -200,8 +296,7 @@ ENTRY(ia32_ptregs_common)
call *%rax
movq %r15, %r11
RESTORE_REST
cmpq $cstar_sysret,%r11
je int_ret_from_sys_call /* misbalances the call/ret stack. sorry */
leaq ia32_sysret(%rip),%r11
pushq %r11
ret
CFI_ENDPROC
......
......@@ -1876,18 +1876,9 @@ long sys32_quotactl(void)
cond_syscall(sys32_ipc)
struct exec_domain ia32_exec_domain = {
.name = "linux/x86",
.pers_low = PER_LINUX32,
.pers_high = PER_LINUX32,
};
static int __init ia32_init (void)
{
printk("IA32 emulation $Id: sys_ia32.c,v 1.32 2002/03/24 13:02:28 ak Exp $\n");
ia32_exec_domain.signal_map = default_exec_domain.signal_map;
ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
register_exec_domain(&ia32_exec_domain);
return 0;
}
......
......@@ -13,16 +13,22 @@
#include <asm/tlbflush.h>
#include <asm/ia32_unistd.h>
/* 32bit VDSO mapped into user space. */
/* 32bit VDSOs mapped into user space. */
asm(".section \".init.data\",\"aw\"\n"
"syscall32:\n"
".incbin \"arch/x86_64/ia32/vsyscall.so\"\n"
"syscall32_end:\n"
"syscall32_syscall:\n"
".incbin \"arch/x86_64/ia32/vsyscall-syscall.so\"\n"
"syscall32_syscall_end:\n"
"syscall32_sysenter:\n"
".incbin \"arch/x86_64/ia32/vsyscall-sysenter.so\"\n"
"syscall32_sysenter_end:\n"
".previous");
extern unsigned char syscall32[], syscall32_end[];
extern unsigned char syscall32_syscall[], syscall32_syscall_end[];
extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
extern int sysctl_vsyscall32;
char *syscall32_page;
static int use_sysenter __initdata = -1;
/* RED-PEN: This knows too much about high level VM */
/* Alternative would be to generate a vma with appropriate backing options
......@@ -58,8 +64,28 @@ static int __init init_syscall32(void)
if (!syscall32_page)
panic("Cannot allocate syscall32 page");
SetPageReserved(virt_to_page(syscall32_page));
memcpy(syscall32_page, syscall32, syscall32_end - syscall32);
if (use_sysenter > 0) {
memcpy(syscall32_page, syscall32_sysenter,
syscall32_sysenter_end - syscall32_sysenter);
} else {
memcpy(syscall32_page, syscall32_syscall,
syscall32_syscall_end - syscall32_syscall);
}
return 0;
}
__initcall(init_syscall32);
void __init syscall32_cpu_init(void)
{
if (use_sysenter < 0)
use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
/* Load these always in case some future AMD CPU supports
SYSENTER from compat mode too. */
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
wrmsr(MSR_IA32_SYSENTER_ESP, 0, 0);
wrmsrl(MSR_IA32_SYSENTER_EIP, ia32_sysenter_target);
wrmsrl(MSR_CSTAR, ia32_cstar_target);
}
/*
* Code for the vsyscall page. This version uses the syscall instruction.
* Common code for the sigreturn entry points on the vsyscall page.
* This code uses SYSCALL_ENTER_KERNEL (either syscall or int $0x80)
* to enter the kernel.
* This file is #include'd by vsyscall-*.S to define them after the
* vsyscall entry point. The addresses we get for these entry points
* by doing ".balign 32" must match in both versions of the page.
*/
#include <asm/ia32_unistd.h>
#include <asm/offset.h>
.text
.section .text.vsyscall,"ax"
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
__kernel_vsyscall:
.LSTART_vsyscall:
push %ebp
.Lpush_ebp:
movl %ecx, %ebp
syscall
popl %ebp
.Lpop_ebp:
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.section .text.sigreturn,"ax"
.balign 32
.globl __kernel_sigreturn
......@@ -29,7 +15,7 @@ __kernel_sigreturn:
.LSTART_sigreturn:
popl %eax
movl $__NR_ia32_sigreturn, %eax
syscall
SYSCALL_ENTER_KERNEL
.LEND_sigreturn:
.size __kernel_sigreturn,.-.LSTART_sigreturn
......@@ -40,49 +26,11 @@ __kernel_sigreturn:
__kernel_rt_sigreturn:
.LSTART_rt_sigreturn:
movl $__NR_ia32_rt_sigreturn, %eax
syscall
SYSCALL_ENTER_KERNEL
.LEND_rt_sigreturn:
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
.section .eh_frame,"a",@progbits
.LSTARTFRAME:
.long .LENDCIE-.LSTARTCIE
.LSTARTCIE:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIE:
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
.LSTARTFDE1:
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0 /* Augmentation length */
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 8
.byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 4
.align 4
.LENDFDE1:
.long .LENDFDE2-.LSTARTFDE2 /* Length FDE */
.LSTARTFDE2:
.long .LSTARTFDE2-.LSTARTFRAME /* CIE pointer */
......
/*
* Code for the vsyscall page. This version uses the syscall instruction.
*/
#include <asm/ia32_unistd.h>
#include <asm/offset.h>
#include <asm/segment.h>
.text
.section .text.vsyscall,"ax"
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
__kernel_vsyscall:
.LSTART_vsyscall:
push %ebp
.Lpush_ebp:
movl %ecx, %ebp
syscall
movl $__USER32_DS, %ecx
movl %ecx, %ss
movl %ebp, %ecx
popl %ebp
.Lpop_ebp:
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.section .eh_frame,"a",@progbits
.LSTARTFRAME:
.long .LENDCIE-.LSTARTCIE
.LSTARTCIE:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIE:
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
.LSTARTFDE1:
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0 /* Augmentation length */
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 8
.byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x0e /* DW_CFA_def_cfa_offset */
.uleb128 4
.align 4
.LENDFDE1:
#define SYSCALL_ENTER_KERNEL syscall
#include "vsyscall-sigreturn.S"
/*
* Code for the vsyscall page. This version uses the sysenter instruction.
*/
#include <asm/ia32_unistd.h>
#include <asm/offset.h>
.text
.section .text.vsyscall,"ax"
.globl __kernel_vsyscall
.type __kernel_vsyscall,@function
__kernel_vsyscall:
.LSTART_vsyscall:
push %ecx
.Lpush_ecx:
push %edx
.Lpush_edx:
push %ebp
.Lenter_kernel:
movl %esp,%ebp
sysenter
.space 7,0x90
jmp .Lenter_kernel
/* 16: System call normal return point is here! */
pop %ebp
.Lpop_ebp:
pop %edx
.Lpop_edx:
pop %ecx
.Lpop_ecx:
ret
.LEND_vsyscall:
.size __kernel_vsyscall,.-.LSTART_vsyscall
.section .eh_frame,"a",@progbits
.LSTARTFRAME:
.long .LENDCIE-.LSTARTCIE
.LSTARTCIE:
.long 0 /* CIE ID */
.byte 1 /* Version number */
.string "zR" /* NUL-terminated augmentation string */
.uleb128 1 /* Code alignment factor */
.sleb128 -4 /* Data alignment factor */
.byte 8 /* Return address register column */
.uleb128 1 /* Augmentation value length */
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
.byte 0x0c /* DW_CFA_def_cfa */
.uleb128 4
.uleb128 4
.byte 0x88 /* DW_CFA_offset, column 0x8 */
.uleb128 1
.align 4
.LENDCIE:
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
.LSTARTFDE1:
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
.long .LSTART_vsyscall-. /* PC-relative start address */
.long .LEND_vsyscall-.LSTART_vsyscall
.uleb128 0 /* Augmentation length */
/* What follows are the instructions for the table generation.
We have to record all changes of the stack pointer. */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpush_ecx-.LSTART_vsyscall
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x08 /* RA at offset 8 now */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpush_edx-.Lpush_ecx
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x0c /* RA at offset 12 now */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lenter_kernel-.Lpush_edx
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x10 /* RA at offset 16 now */
.byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
/* Finally the epilogue. */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_ebp-.Lenter_kernel
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x12 /* RA at offset 12 now */
.byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_edx-.Lpop_ebp
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x08 /* RA at offset 8 now */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_ecx-.Lpop_edx
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x04 /* RA at offset 4 now */
.align 4
.LENDFDE1:
#define SYSCALL_ENTER_KERNEL int $0x80
#include "vsyscall-sigreturn.S"
......@@ -8,10 +8,9 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bootflag.o e820.o reboot.o warmreboot.o
obj-y += mce.o
obj-y += mce.o acpi/
obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
......
......@@ -78,6 +78,31 @@ __acpi_map_table (
return NULL;
}
#ifdef CONFIG_PCI_MMCONFIG
static int __init acpi_parse_mcfg(unsigned long phys_addr, unsigned long size)
{
struct acpi_table_mcfg *mcfg;
if (!phys_addr || !size)
return -EINVAL;
mcfg = (struct acpi_table_mcfg *) __acpi_map_table(phys_addr, size);
if (!mcfg) {
printk(KERN_WARNING PREFIX "Unable to map MCFG\n");
return -ENODEV;
}
if (mcfg->base_reserved) {
printk(KERN_ERR PREFIX "MMCONFIG not in low 4GB of memory\n");
return -ENODEV;
}
pci_mmcfg_base_addr = mcfg->base_address;
return 0;
}
#endif /* CONFIG_PCI_MMCONFIG */
#ifdef CONFIG_X86_LOCAL_APIC
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
......@@ -234,6 +259,24 @@ acpi_parse_nmi_src (
#endif /*CONFIG_X86_IO_APIC*/
static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
{
struct acpi_table_sbf *sb;
if (!phys_addr || !size)
return -EINVAL;
sb = (struct acpi_table_sbf *) __acpi_map_table(phys_addr, size);
if (!sb) {
printk(KERN_WARNING PREFIX "Unable to map SBF\n");
return -ENODEV;
}
sbf_port = sb->sbf_cmos; /* Save CMOS port */
return 0;
}
#ifdef CONFIG_HPET_TIMER
static int __init
acpi_parse_hpet (
......@@ -404,6 +447,8 @@ acpi_boot_init (void)
return result;
}
(void) acpi_table_parse(ACPI_BOOT, acpi_parse_sbf);
result = acpi_blacklisted();
if (result) {
printk(KERN_WARNING PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
......@@ -550,6 +595,12 @@ acpi_boot_init (void)
printk("ACPI: no HPET table found (%d).\n", result);
#endif
#ifdef CONFIG_PCI_MMCONFIG
result = acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg);
if (result)
printk(KERN_ERR PREFIX "Error %d parsing MCFG\n", result);
#endif
return 0;
}
......
......@@ -226,7 +226,7 @@ sysret_careful:
/* Handle a signal */
sysret_signal:
sti
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME),%edx
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
jz 1f
/* Really a signal */
......@@ -307,7 +307,7 @@ int_very_careful:
jmp int_restore_rest
int_signal:
testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING),%edx
testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
jz 1f
movq %rsp,%rdi # &ptregs -> arg1
xorl %esi,%esi # oldset -> arg2
......@@ -489,7 +489,7 @@ retint_careful:
jmp retint_check
retint_signal:
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME),%edx
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
jz retint_swapgs
sti
SAVE_REST
......@@ -572,6 +572,24 @@ ENTRY(spurious_interrupt)
jmp error_entry
.endm
/* error code is on the stack already */
/* handle NMI like exceptions that can happen everywhere */
.macro paranoidentry sym
SAVE_ALL
cld
movl $1,%ebx
movl $MSR_GS_BASE,%ecx
rdmsr
testl %edx,%edx
js 1f
swapgs
xorl %ebx,%ebx
1: movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi
movq $-1,ORIG_RAX(%rsp)
call \sym
.endm
/*
* Exception entry point. This expects an error code/orig_rax on the stack
* and the exception handler in %rax.
......@@ -625,6 +643,7 @@ error_sti:
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
call *%rax
/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
error_exit:
movl %ebx,%eax
RESTORE_REST
......@@ -776,48 +795,59 @@ ENTRY(simd_coprocessor_error)
zeroentry do_simd_coprocessor_error
ENTRY(device_not_available)
CFI_STARTPROC
pushq $-1 #error code
SAVE_ALL
movl $1,%ebx
testl $3,CS(%rsp)
je 1f
xorl %ebx,%ebx
swapgs
1: movq %cr0,%rax
leaq math_state_restore(%rip),%rcx
leaq math_emulate(%rip),%rdx
testl $0x4,%eax
cmoveq %rcx,%rdx
call *%rdx
jmp error_exit
CFI_ENDPROC
zeroentry math_state_restore
/* runs on exception stack */
ENTRY(debug)
zeroentry do_debug
CFI_STARTPROC
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug
paranoid_stack_switch:
testq %rax,%rax
jz paranoid_exit
/* switch back to process stack to restore the state ptrace touched */
movq %rax,%rsp
jmp paranoid_exit
CFI_ENDPROC
/* runs on exception stack */
ENTRY(nmi)
CFI_STARTPROC
pushq $-1
SAVE_ALL
/* NMI could happen inside the critical section of a swapgs,
so it is needed to use this expensive way to check. */
movl $MSR_GS_BASE,%ecx
rdmsr
xorl %ebx,%ebx
testl %edx,%edx
js 1f
swapgs
movl $1,%ebx
1: movq %rsp,%rdi # regs -> arg1
call do_nmi
/* XXX: should do preemption checks here */
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_nmi
/* ebx: no swapgs flag */
paranoid_exit:
testl $3,CS(%rsp)
jnz paranoid_userspace
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
paranoid_swapgs:
cli
testl %ebx,%ebx
jz 2f
swapgs
2: RESTORE_ALL 8
paranoid_restore:
RESTORE_ALL 8
iretq
paranoid_userspace:
cli
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%edx
testl $_TIF_NEED_RESCHED,%edx
jnz paranoid_resched
testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
jnz paranoid_signal
jmp paranoid_swapgs
paranoid_resched:
sti
call schedule
jmp paranoid_exit
paranoid_signal:
sti
xorl %esi,%esi /* oldset */
movq %rsp,%rdi /* &pt_regs */
call do_notify_resume
jmp paranoid_exit
CFI_ENDPROC
ENTRY(int3)
......@@ -838,8 +868,10 @@ ENTRY(coprocessor_segment_overrun)
ENTRY(reserved)
zeroentry do_reserved
/* runs on exception stack */
ENTRY(double_fault)
errorentry do_double_fault
paranoidentry do_double_fault
jmp paranoid_stack_switch
ENTRY(invalid_TSS)
errorentry do_invalid_TSS
......@@ -847,8 +879,10 @@ ENTRY(invalid_TSS)
ENTRY(segment_not_present)
errorentry do_segment_not_present
/* runs on exception stack */
ENTRY(stack_segment)
errorentry do_stack_segment
paranoidentry do_stack_segment
jmp paranoid_stack_switch
ENTRY(general_protection)
errorentry do_general_protection
......@@ -862,8 +896,14 @@ ENTRY(divide_error)
ENTRY(spurious_interrupt_bug)
zeroentry do_spurious_interrupt_bug
/* runs on exception stack */
ENTRY(machine_check)
zeroentry do_machine_check
CFI_STARTPROC
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_machine_check
jmp paranoid_exit
CFI_ENDPROC
ENTRY(call_debug)
zeroentry do_call_debug
......
......@@ -95,7 +95,5 @@ asmlinkage long sys_iopl(unsigned int level, struct pt_regs regs)
return -EPERM;
}
regs.eflags = (regs.eflags &~ 0x3000UL) | (level << 12);
/* Make sure we return the long way (not sysenter) */
set_thread_flag(TIF_IRET);
return 0;
}
......@@ -880,6 +880,7 @@ extern FADT_DESCRIPTOR acpi_fadt;
void __init mp_config_ioapic_for_sci(int irq)
{
#ifdef CONFIG_ACPI_INTERPRETER
int ioapic;
int ioapic_pin;
struct acpi_table_madt *madt;
......@@ -939,6 +940,7 @@ void __init mp_config_ioapic_for_sci(int irq)
*/
io_apic_set_pci_routing(ioapic, ioapic_pin, irq,
(flags.trigger == 1 ? 0 : 1), (flags.polarity == 1 ? 0 : 1));
#endif
}
#ifdef CONFIG_ACPI_PCI
......
......@@ -50,6 +50,12 @@ int force_iommu = 0;
#endif
int iommu_merge = 0;
int iommu_sac_force = 0;
/* If this is disabled the IOMMU will use an optimized flushing strategy
of only flushing when an mapping is reused. With it true the GART is flushed
for every mapping. Problem is that doing the lazy flush seems to trigger
bugs with some popular PCI cards, in particular 3ware (but has been also
also seen with Qlogic at least). */
int iommu_fullflush = 1;
#define MAX_NB 8
......
......@@ -53,7 +53,7 @@ asmlinkage extern void ret_from_fork(void);
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
int hlt_counter;
atomic_t hlt_counter = ATOMIC_INIT(0);
/*
* Powermanagement idle function, if any..
......@@ -62,14 +62,14 @@ void (*pm_idle)(void);
void disable_hlt(void)
{
hlt_counter++;
atomic_inc(&hlt_counter);
}
EXPORT_SYMBOL(disable_hlt);
void enable_hlt(void)
{
hlt_counter--;
atomic_dec(&hlt_counter);
}
EXPORT_SYMBOL(enable_hlt);
......@@ -80,7 +80,7 @@ EXPORT_SYMBOL(enable_hlt);
*/
void default_idle(void)
{
if (!hlt_counter) {
if (!atomic_read(&hlt_counter)) {
local_irq_disable();
if (!need_resched())
safe_halt();
......
......@@ -218,6 +218,11 @@ static __init void parse_cmdline_early (char ** cmdline_p)
if (!memcmp(from, "acpi=ht", 7)) {
acpi_ht = 1;
}
/* acpi=strict disables out-of-spec workarounds */
else if (!memcmp(from, "acpi=strict", 11)) {
acpi_strict = 1;
}
#endif
if (!memcmp(from, "nolapic", 7) ||
......@@ -793,13 +798,12 @@ struct cpu_model_info {
char *model_names[16];
};
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
void __init identify_cpu(struct cpuinfo_x86 *c)
/* Do some early cpuid on the boot CPU to get some parameter that are
needed before check_bugs. Everything advanced is in identify_cpu
below. */
void __init early_identify_cpu(struct cpuinfo_x86 *c)
{
int i;
u32 xlvl, tfms;
u32 tfms;
c->loops_per_jiffy = loops_per_jiffy;
c->x86_cache_size = -1;
......@@ -807,6 +811,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
c->x86_model = c->x86_mask = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_clflush_size = 64;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
/* Get vendor name */
......@@ -816,6 +821,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
(int *)&c->x86_vendor_id[4]);
get_cpu_vendor(c);
/* Initialize the standard set of capabilities */
/* Note that the vendor-specific code below might override */
......@@ -837,6 +843,17 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
/* Have CPUID level 0 only - unheard of */
c->x86 = 4;
}
}
/*
* This does the hard work of actually picking apart the CPU stuff...
*/
void __init identify_cpu(struct cpuinfo_x86 *c)
{
int i;
u32 xlvl;
early_identify_cpu(c);
/* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000);
......@@ -854,7 +871,6 @@ void __init identify_cpu(struct cpuinfo_x86 *c)
c->x86_capability[2] = cpuid_edx(0x80860001);
}
/*
* Vendor-specific initialization. In this section we
* canonicalize the feature flags, meaning if there are
......
......@@ -202,7 +202,7 @@ void __init syscall_init(void)
wrmsrl(MSR_LSTAR, system_call);
#ifdef CONFIG_IA32_EMULATION
wrmsrl(MSR_CSTAR, ia32_cstar_target);
syscall32_cpu_init ();
#endif
/* Flags to clear on syscall */
......@@ -274,6 +274,9 @@ void __init cpu_init (void)
asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax");
if (cpu == 0)
early_identify_cpu(&boot_cpu_data);
syscall_init();
wrmsrl(MSR_FS_BASE, 0);
......@@ -287,7 +290,8 @@ void __init cpu_init (void)
*/
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
if (cpu) {
estacks = (char *)__get_free_pages(GFP_ATOMIC, 0);
estacks = (char *)__get_free_pages(GFP_ATOMIC,
EXCEPTION_STACK_ORDER);
if (!estacks)
panic("Cannot allocate exception stack %ld %d\n",
v, cpu);
......
......@@ -55,11 +55,16 @@
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
char phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
/* Bitmask of currently online CPUs */
cpumask_t cpu_online_map;
/* which CPU (physical APIC ID) maps to which logical CPU number */
volatile char x86_apicid_to_cpu[NR_CPUS];
/* which logical CPU number maps to which CPU (physical APIC ID) */
volatile char x86_cpu_to_apicid[NR_CPUS];
static cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
static cpumask_t smp_commenced_mask;
......@@ -70,7 +75,7 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
/* Set when the idlers are all forked */
int smp_threads_ready;
int cpu_sibling_map[NR_CPUS] __cacheline_aligned;
char cpu_sibling_map[NR_CPUS] __cacheline_aligned;
/*
* Trampoline 80x86 program as an array.
......@@ -574,6 +579,9 @@ static void __init do_boot_cpu (int apicid)
if (IS_ERR(idle))
panic("failed fork for CPU %d", cpu);
wake_up_forked_process(idle);
x86_cpu_to_apicid[cpu] = apicid;
x86_apicid_to_cpu[apicid] = cpu;
/*
* We remove it from the pidhash and the runqueue
......@@ -885,7 +893,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
break;
}
}
if (cpu_sibling_map[cpu] == NO_PROC_ID) {
if (cpu_sibling_map[cpu] == (char)NO_PROC_ID) {
smp_num_siblings = 1;
printk(KERN_WARNING "WARNING: No sibling found for CPU %d.\n", cpu);
}
......
......@@ -351,24 +351,19 @@ void oops_end(void)
void __die(const char * str, struct pt_regs * regs, long err)
{
int nl = 0;
static int die_counter;
printk(KERN_EMERG "%s: %04lx [%u]\n", str, err & 0xffff,++die_counter);
notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
#ifdef CONFIG_PREEMPT
printk("PREEMPT ");
nl = 1;
#endif
#ifdef CONFIG_SMP
printk("SMP ");
nl = 1;
#endif
#ifdef CONFIG_DEBUG_PAGEALLOC
printk("DEBUG_PAGEALLOC");
nl = 1;
#endif
if (nl)
printk("\n");
notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV);
show_registers(regs);
/* Executive summary in case the oops scrolled away */
printk("RIP ");
......@@ -475,14 +470,27 @@ DO_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_ERROR( 5, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
DO_ERROR(18, SIGSEGV, "reserved", reserved)
#define DO_ERROR_STACK(trapnr, signr, str, name) \
asmlinkage unsigned long do_##name(struct pt_regs * regs, long error_code) \
{ \
struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
return 0; \
if (regs->cs & 3) \
memcpy(pr, regs, sizeof(struct pt_regs)); \
do_trap(trapnr, signr, str, regs, error_code, NULL); \
return (regs->cs & 3) ? (unsigned long)pr : 0; \
}
DO_ERROR_STACK(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_STACK( 8, SIGSEGV, "double fault", double_fault)
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
{
conditional_sti(regs);
......@@ -596,12 +604,18 @@ asmlinkage void default_do_nmi(struct pt_regs * regs)
inb(0x71); /* dummy */
}
asmlinkage void do_debug(struct pt_regs * regs, long error_code)
/* runs on IST stack. */
asmlinkage unsigned long do_debug(struct pt_regs * regs, unsigned long error_code)
{
struct pt_regs *processregs;
unsigned long condition;
struct task_struct *tsk = current;
siginfo_t info;
processregs = (struct pt_regs *)(current->thread.rsp0)-1;
if (regs->cs & 3)
memcpy(processregs, regs, sizeof(struct pt_regs));
#ifdef CONFIG_CHECKING
{
/* RED-PEN interaction with debugger - could destroy gs */
......@@ -658,17 +672,21 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
force_sig_info(SIGTRAP, &info, tsk);
clear_dr7:
asm volatile("movq %0,%%db7"::"r"(0UL));
notify_die(DIE_DEBUG, "debug", regs, error_code, 1, SIGTRAP);
return;
notify_die(DIE_DEBUG, "debug", regs, condition, 1, SIGTRAP);
out:
return (regs->cs & 3) ? (unsigned long)processregs : 0;
clear_TF_reenable:
printk("clear_tf_reenable\n");
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
clear_TF:
/* RED-PEN could cause spurious errors */
if (notify_die(DIE_DEBUG, "debug2", regs, error_code, 1, SIGTRAP) != NOTIFY_BAD)
if (notify_die(DIE_DEBUG, "debug2", regs, condition, 1, SIGTRAP)
!= NOTIFY_BAD)
regs->eflags &= ~TF_MASK;
return;
goto out;
}
/*
......@@ -730,7 +748,7 @@ void math_error(void *rip)
force_sig_info(SIGFPE, &info, task);
}
asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code)
asmlinkage void do_coprocessor_error(struct pt_regs * regs)
{
conditional_sti(regs);
math_error((void *)regs->rip);
......@@ -789,8 +807,7 @@ static inline void simd_math_error(void *rip)
force_sig_info(SIGFPE, &info, task);
}
asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs,
long error_code)
asmlinkage void do_simd_coprocessor_error(struct pt_regs * regs)
{
conditional_sti(regs);
simd_math_error((void *)regs->rip);
......@@ -818,11 +835,6 @@ asmlinkage void math_state_restore(void)
me->thread_info->status |= TS_USEDFPU;
}
asmlinkage void math_emulate(void)
{
BUG();
}
void do_call_debug(struct pt_regs *regs)
{
notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT);
......@@ -831,7 +843,7 @@ void do_call_debug(struct pt_regs *regs)
void __init trap_init(void)
{
set_intr_gate(0,&divide_error);
set_intr_gate(1,&debug);
set_intr_gate_ist(1,&debug,DEBUG_STACK);
set_intr_gate_ist(2,&nmi,NMI_STACK);
set_system_gate(3,&int3); /* int3-5 can be called from all */
set_system_gate(4,&overflow);
......@@ -848,7 +860,7 @@ void __init trap_init(void)
set_intr_gate(15,&spurious_interrupt_bug);
set_intr_gate(16,&coprocessor_error);
set_intr_gate(17,&alignment_check);
set_intr_gate(18,&machine_check);
set_intr_gate_ist(18,&machine_check, MCE_STACK);
set_intr_gate(19,&simd_coprocessor_error);
#ifdef CONFIG_IA32_EMULATION
......
......@@ -31,9 +31,6 @@
* broken programs will segfault and there's no security risk until we choose to
* fix it.
*
* Add HPET support (port from 2.4). Still needed?
* Nop out vsyscall syscall to avoid anchor for buffer overflows when sysctl off.
*
* These are not urgent things that we need to address only before shipping the first
* production binary kernels.
*/
......@@ -89,7 +86,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv)
if (t < __vxtime.last_tsc) t = __vxtime.last_tsc;
usec += ((t - __vxtime.last_tsc) *
__vxtime.tsc_quot) >> 32;
/* See comment in x86_64 do_gettimeopfday. */
/* See comment in x86_64 do_gettimeofday. */
} else {
usec += ((readl(fix_to_virt(VSYSCALL_HPET) + 0xf0) -
__vxtime.last) * __vxtime.quot) >> 32;
......@@ -106,6 +103,7 @@ static force_inline void do_get_tz(struct timezone * tz)
*tz = __sys_tz;
}
static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
{
int ret;
......@@ -115,6 +113,15 @@ static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
return ret;
}
static force_inline long time_syscall(long *t)
{
long secs;
asm volatile("syscall"
: "=a" (secs)
: "0" (__NR_time),"D" (t) : __syscall_clobber);
return secs;
}
static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
{
if (unlikely(!__sysctl_vsyscall))
......@@ -126,16 +133,15 @@ static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz
return 0;
}
static time_t __vsyscall(1) vtime(time_t * t)
/* This will break when the xtime seconds get inaccurate, but that is
* unlikely */
static time_t __vsyscall(1) vtime(time_t *t)
{
struct timeval tv;
if (unlikely(!__sysctl_vsyscall))
gettimeofday(&tv, NULL);
else
do_vgettimeofday(&tv);
if (t)
*t = tv.tv_sec;
return tv.tv_sec;
return time_syscall(t);
else if (t)
*t = __xtime.tv_sec;
return __xtime.tv_sec;
}
static long __vsyscall(2) venosys_0(void)
......
......@@ -225,3 +225,6 @@ EXPORT_SYMBOL_GPL(flush_tlb_all);
#endif
EXPORT_SYMBOL(sys_ioctl);
EXPORT_SYMBOL(memcpy_toio);
EXPORT_SYMBOL(memcpy_fromio);
......@@ -220,10 +220,14 @@ csum_partial_copy_generic:
/* Exception handlers. Very simple, zeroing is done in the wrappers */
.Lbad_source:
movq (%rsp),%rax
testq %rax,%rax
jz .Lende
movl $-EFAULT,(%rax)
jmp .Lende
.Lbad_dest:
movq 8(%rsp),%rax
testq %rax,%rax
jz .Lende
movl $-EFAULT,(%rax)
jmp .Lende
......@@ -11,7 +11,3 @@ void *memcpy_fromio(void *dst,const void*src,unsigned len)
{
return __inline_memcpy(dst,__io_virt(src),len);
}
EXPORT_SYMBOL(memcpy_toio);
EXPORT_SYMBOL(memcpy_fromio);
......@@ -280,15 +280,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(in_atomic() || !mm))
goto bad_area_nosemaphore;
/* Work around K8 erratum #100
K8 in compat mode occasionally jumps to illegal addresses >4GB.
We catch this here in the page fault handler because these
addresses are not reachable. Just detect this case and return.
Any code segment in LDT is compatibility mode. */
if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
(address >> 32))
return;
again:
down_read(&mm->mmap_sem);
......@@ -373,6 +364,16 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (is_prefetch(regs, address))
return;
/* Work around K8 erratum #100 K8 in compat mode
occasionally jumps to illegal addresses >4GB. We
catch this here in the page fault handler because
these addresses are not reachable. Just detect this
case and return. Any code segment in LDT is
compatibility mode. */
if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
(address >> 32))
return;
if (exception_trace && !unhandled_signal(tsk, SIGSEGV)) {
printk(KERN_INFO
"%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n",
......
......@@ -37,7 +37,9 @@
#include <asm/proto.h>
#include <asm/smp.h>
#ifndef Dprintk
#define Dprintk(x...)
#endif
extern char _stext[];
......@@ -577,3 +579,32 @@ static __init int x8664_sysctl_init(void)
}
__initcall(x8664_sysctl_init);
#endif
/* Pseudo VMAs to allow ptrace access for the vsyscall pages. x86-64 has two
different ones: one for 32bit and one for 64bit. Use the appropiate
for the target task. */
static struct vm_area_struct gate_vma = {
.vm_start = VSYSCALL_START,
.vm_end = VSYSCALL_END,
.vm_page_prot = PAGE_READONLY
};
static struct vm_area_struct gate32_vma = {
.vm_start = VSYSCALL32_BASE,
.vm_end = VSYSCALL32_END,
.vm_page_prot = PAGE_READONLY
};
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
{
return test_tsk_thread_flag(tsk, TIF_IA32) ? &gate32_vma : &gate_vma;
}
int in_gate_area(struct task_struct *task, unsigned long addr)
{
struct vm_area_struct *vma = &gate_vma;
if (test_tsk_thread_flag(task, TIF_IA32))
vma = &gate32_vma;
return (addr >= vma->vm_start) && (addr < vma->vm_end);
}
......@@ -14,7 +14,9 @@
#include <asm/dma.h>
#include <asm/numa.h>
#ifndef Dprintk
#define Dprintk(x...)
#endif
struct pglist_data *node_data[MAXNODE];
bootmem_data_t plat_node_bdata[MAX_NUMNODES];
......
#
# Makefile for X86_64 specific PCI routines
#
# Reuse the i386 PCI subsystem using symlinks
# Reuse the i386 PCI subsystem
#
CFLAGS += -I arch/i386/pci
obj-y := i386.o
obj-$(CONFIG_PCI_DIRECT)+= direct.o
obj-y += fixup.o
obj-$(CONFIG_ACPI_PCI) += acpi.o
obj-y += legacy.o irq.o common.o
# mmconfig has a 64bit special
obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
$(obj)/direct.c: $(obj)/pci.h
@ln -sf ../../i386/pci/direct.c $(obj)/direct.c
$(obj)/legacy.c: $(obj)/pci.h
@ln -sf ../../i386/pci/legacy.c $(obj)/legacy.c
$(obj)/common.c: $(obj)/pci.h
@ln -sf ../../i386/pci/common.c $(obj)/common.c
$(obj)/acpi.c: $(obj)/pci.h
@ln -sf ../../i386/pci/acpi.c $(obj)/acpi.c
$(obj)/pci.h:
@ln -sf ../../i386/pci/pci.h $(obj)/pci.h
$(obj)/irq.c: $(obj)/pci.h
@ln -sf ../../i386/pci/irq.c $(obj)/irq.c
$(obj)/fixup.c: $(obj)/pci.h
@ln -sf ../../i386/pci/fixup.c $(obj)/fixup.c
$(obj)/i386.c: $(obj)/pci.h
@ln -sf ../../i386/pci/i386.c $(obj)/i386.c
clean-files += i386.c legacy.c fixup.c acpi.c irq.c pci.h common.c direct.c
direct-y += ../../i386/pci/direct.o
acpi-y += ../../i386/pci/acpi.o
legacy-y += ../../i386/pci/legacy.o
irq-y += ../../i386/pci/irq.o
common-y += ../../i386/pci/common.o
fixup-y += ../../i386/pci/fixup.o
i386-y += ../../i386/pci/i386.o
/*
* mmconfig.c - Low-level direct PCI config space access via MMCONFIG
*
* This is an 64bit optimized version that always keeps the full mmconfig
* space mapped. This allows lockless config space operation.
*/
#include <linux/pci.h>
#include <linux/init.h>
#include "pci.h"
#define MMCONFIG_APER_SIZE (256*1024*1024)
/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
u32 pci_mmcfg_base_addr;
/* Static virtual mapping of the MMCONFIG aperture */
char *pci_mmcfg_virt;
static inline char *pci_dev_base(int bus, int devfn)
{
return pci_mmcfg_virt + ((bus << 20) | (devfn << 12));
}
static int pci_mmcfg_read(int seg, int bus, int devfn, int reg, int len, u32 *value)
{
char *addr = pci_dev_base(bus, devfn);
if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095)))
return -EINVAL;
switch (len) {
case 1:
*value = readb(addr + reg);
break;
case 2:
*value = readw(addr + reg);
break;
case 4:
*value = readl(addr + reg);
break;
}
return 0;
}
static int pci_mmcfg_write(int seg, int bus, int devfn, int reg, int len, u32 value)
{
char *addr = pci_dev_base(bus,devfn);
if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
return -EINVAL;
switch (len) {
case 1:
writeb(value, addr + reg);
break;
case 2:
writew(value, addr + reg);
break;
case 4:
writel(value, addr + reg);
break;
}
/* Dummy read to flush PCI write */
readl(addr);
return 0;
}
static struct pci_raw_ops pci_mmcfg = {
.read = pci_mmcfg_read,
.write = pci_mmcfg_write,
};
static int __init pci_mmcfg_init(void)
{
if ((pci_probe & PCI_PROBE_MMCONF) == 0)
return 0;
if (!pci_mmcfg_base_addr)
return 0;
/* RED-PEN i386 doesn't do _nocache right now */
pci_mmcfg_virt = ioremap_nocache(pci_mmcfg_base_addr, MMCONFIG_APER_SIZE);
if (!pci_mmcfg_virt) {
printk("PCI: Cannot map mmconfig aperture\n");
return 0;
}
printk(KERN_INFO "PCI: Using MMCONFIG at %lx\n", pci_mmcfg_base_addr);
raw_pci_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
return 0;
}
arch_initcall(pci_mmcfg_init);
......@@ -31,7 +31,7 @@
#define ARGOFFSET R11
#define SWFRAME ORIG_RAX
.macro SAVE_ARGS addskip=0,norcx=0
.macro SAVE_ARGS addskip=0,norcx=0,nor891011=0
subq $9*8+\addskip,%rsp
CFI_ADJUST_CFA_OFFSET 9*8+\addskip
movq %rdi,8*8(%rsp)
......@@ -47,6 +47,8 @@
.endif
movq %rax,4*8(%rsp)
CFI_OFFSET rax,4*8-(9*8+\addskip)
.if \nor891011
.else
movq %r8,3*8(%rsp)
CFI_OFFSET r8,3*8-(9*8+\addskip)
movq %r9,2*8(%rsp)
......@@ -55,17 +57,21 @@
CFI_OFFSET r10,1*8-(9*8+\addskip)
movq %r11,(%rsp)
CFI_OFFSET r11,-(9*8+\addskip)
.endif
.endm
#define ARG_SKIP 9*8
.macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0
.macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0
.if \skipr11
.else
movq (%rsp),%r11
.endif
.if \skipr8910
.else
movq 1*8(%rsp),%r10
movq 2*8(%rsp),%r9
movq 3*8(%rsp),%r8
.endif
.if \skiprax
.else
movq 4*8(%rsp),%rax
......@@ -74,7 +80,10 @@
.else
movq 5*8(%rsp),%rcx
.endif
.if \skiprdx
.else
movq 6*8(%rsp),%rdx
.endif
movq 7*8(%rsp),%rsi
movq 8*8(%rsp),%rdi
.if ARG_SKIP+\addskip > 0
......
......@@ -156,6 +156,10 @@ extern inline unsigned int cpuid_edx(unsigned int op)
#define MSR_MTRRcap 0x0fe
#define MSR_IA32_BBL_CR_CTL 0x119
#define MSR_IA32_SYSENTER_CS 0x174
#define MSR_IA32_SYSENTER_ESP 0x175
#define MSR_IA32_SYSENTER_EIP 0x176
#define MSR_IA32_MCG_CAP 0x179
#define MSR_IA32_MCG_STATUS 0x17a
#define MSR_IA32_MCG_CTL 0x17b
......
......@@ -137,6 +137,13 @@ extern __inline__ int get_order(unsigned long size)
#define VM_STACK_DEFAULT_FLAGS \
(test_thread_flag(TIF_IA32) ? vm_stack_flags32 : vm_stack_flags)
#define CONFIG_ARCH_GATE_AREA 1
#ifndef __ASSEMBLY__
struct task_struct;
struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
int in_gate_area(struct task_struct *task, unsigned long addr);
#endif
#endif /* __KERNEL__ */
......
......@@ -173,7 +173,7 @@ static inline void clear_in_cr4 (unsigned long mask)
* space during mmap's.
*/
#define IA32_PAGE_OFFSET ((current->personality & ADDR_LIMIT_3GB) ? 0xc0000000 : 0xFFFFe000)
#define TASK_UNMAPPED_32 (PAGE_ALIGN(0xc5000000))
#define TASK_UNMAPPED_32 PAGE_ALIGN(IA32_PAGE_OFFSET/3)
#define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3)
#define TASK_UNMAPPED_BASE \
(test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
......@@ -262,7 +262,9 @@ struct thread_struct {
#define STACKFAULT_STACK 1
#define DOUBLEFAULT_STACK 2
#define NMI_STACK 3
#define N_EXCEPTION_STACKS 3 /* hw limit: 7 */
#define DEBUG_STACK 4
#define MCE_STACK 5
#define N_EXCEPTION_STACKS 5 /* hw limit: 7 */
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
#define EXCEPTION_STACK_ORDER 0
......@@ -451,4 +453,6 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
ti->task; \
})
#define cache_line_size() (boot_cpu_data.x86_clflush_size)
#endif /* __ASM_X86_64_PROCESSOR_H */
......@@ -21,6 +21,7 @@ extern void syscall_init(void);
extern void ia32_syscall(void);
extern void ia32_cstar_target(void);
extern void ia32_sysenter_target(void);
extern void calibrate_delay(void);
extern void cpu_idle(void);
......@@ -37,6 +38,8 @@ extern int numa_setup(char *opt);
extern int setup_early_printk(char *);
extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
extern void early_identify_cpu(struct cpuinfo_x86 *c);
extern int k8_scan_nodes(unsigned long start, unsigned long end);
extern int numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
......@@ -68,6 +71,7 @@ extern void show_regs(struct pt_regs * regs);
extern int map_syscall32(struct mm_struct *mm, unsigned long address);
extern char *syscall32_page;
extern void syscall32_cpu_init(void);
extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);
......
......@@ -47,7 +47,7 @@ extern void smp_invalidate_rcv(void); /* Process an NMI */
extern void (*mtrr_hook) (void);
extern void zap_low_mappings(void);
void smp_stop_cpu(void);
extern int cpu_sibling_map[];
extern char cpu_sibling_map[];
#define SMP_TRAMPOLINE_BASE 0x6000
......@@ -74,7 +74,15 @@ extern __inline int hard_smp_processor_id(void)
return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
}
#define safe_smp_processor_id() (disable_apic ? 0 : hard_smp_processor_id())
/*
* Some lowlevel functions might want to know about
* the real APIC ID <-> CPU # mapping.
* AK: why is this volatile?
*/
extern volatile char x86_apicid_to_cpu[NR_CPUS];
extern volatile char x86_cpu_to_apicid[NR_CPUS];
#define safe_smp_processor_id() (disable_apic ? 0 : x86_apicid_to_cpu[hard_smp_processor_id()])
#define cpu_online(cpu) cpu_isset(cpu, cpu_online_map)
#endif /* !ASSEMBLY */
......
......@@ -82,7 +82,6 @@ static inline struct thread_info *stack_thread_info(void)
#else /* !__ASSEMBLY__ */
/* how to get the thread information struct from ASM */
/* only works on the process stack. otherwise get it via the PDA. */
#define GET_THREAD_INFO(reg) \
movq %gs:pda_kernelstack,reg ; \
subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
......@@ -118,8 +117,10 @@ static inline struct thread_info *stack_thread_info(void)
#define _TIF_FORK (1<<TIF_FORK)
#define _TIF_ABI_PENDING (1<<TIF_ABI_PENDING)
#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK (0x0000FFFF & ~(_TIF_SYSCALL_TRACE|_TIF_SINGLESTEP))
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK 0x0000FFFF
#define PREEMPT_ACTIVE 0x4000000
......
......@@ -3,11 +3,18 @@
/* Values need to match arch/x86_64/ia32/vsyscall.lds */
#ifdef __ASSEMBLY__
#define VSYSCALL32_BASE 0xffffe000
#define VSYSCALL32_SYSEXIT (VSYSCALL32_BASE + 0x410)
#else
#define VSYSCALL32_BASE 0xffffe000UL
#define VSYSCALL32_END (VSYSCALL32_BASE + PAGE_SIZE)
#define VSYSCALL32_EHDR ((const struct elf32_hdr *) VSYSCALL32_BASE)
#define VSYSCALL32_VSYSCALL ((void *)VSYSCALL32_BASE + 0x400)
#define VSYSCALL32_SYSEXIT ((void *)VSYSCALL32_BASE + 0x410)
#define VSYSCALL32_SIGRETURN ((void *)VSYSCALL32_BASE + 0x500)
#define VSYSCALL32_RTSIGRETURN ((void *)VSYSCALL32_BASE + 0x600)
#endif
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment