Commit 1e1144fd authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 update

 - Optimize __copy*user a bit.
 - Merge with 2.5.53/i386
 - Fix broken 32bit signal handling.
 - Implement AT_SYSINFO and a vsyscall page for 32bit programs.
 - Fix 32bit SYSCALL entry point to handle 6 arguments and restart correctly.
 - Add oprofile support (Vojtech Pavlik, with changes by me)
   This is shared code with i386.
parent f4d6e308
...@@ -672,6 +672,7 @@ source "drivers/usb/Kconfig" ...@@ -672,6 +672,7 @@ source "drivers/usb/Kconfig"
source "net/bluetooth/Kconfig" source "net/bluetooth/Kconfig"
source "arch/x86_64/oprofile/Kconfig"
menu "Kernel hacking" menu "Kernel hacking"
...@@ -728,7 +729,6 @@ config INIT_DEBUG ...@@ -728,7 +729,6 @@ config INIT_DEBUG
config KALLSYMS config KALLSYMS
bool "Load all symbols for debugging/kksymoops" bool "Load all symbols for debugging/kksymoops"
depends on DEBUG_KERNEL
help help
Say Y here to let the kernel print out symbolic crash information and Say Y here to let the kernel print out symbolic crash information and
symbolic stack backtraces. This increases the size of the kernel symbolic stack backtraces. This increases the size of the kernel
......
...@@ -54,6 +54,8 @@ libs-y += arch/x86_64/lib/ ...@@ -54,6 +54,8 @@ libs-y += arch/x86_64/lib/
core-y += arch/x86_64/kernel/ arch/x86_64/mm/ core-y += arch/x86_64/kernel/ arch/x86_64/mm/
core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/ core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/
drivers-$(CONFIG_PCI) += arch/x86_64/pci/ drivers-$(CONFIG_PCI) += arch/x86_64/pci/
# FIXME: is drivers- right ?
drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
makeboot =$(Q)$(MAKE) -f scripts/Makefile.build obj=arch/x86_64/boot $(1) makeboot =$(Q)$(MAKE) -f scripts/Makefile.build obj=arch/x86_64/boot $(1)
......
...@@ -47,6 +47,7 @@ cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \ ...@@ -47,6 +47,7 @@ cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/bootsect $(obj)/setup \
$(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \ $(obj)/zImage $(obj)/bzImage: $(obj)/bootsect $(obj)/setup \
$(obj)/vmlinux.bin $(obj)/tools/build FORCE $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image) $(call if_changed,image)
@echo 'Kernel: $@ is ready'
$(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
$(call if_changed,objcopy) $(call if_changed,objcopy)
......
...@@ -470,6 +470,7 @@ CONFIG_RTC=y ...@@ -470,6 +470,7 @@ CONFIG_RTC=y
# CONFIG_FTAPE is not set # CONFIG_FTAPE is not set
# CONFIG_AGP is not set # CONFIG_AGP is not set
# CONFIG_AGP_GART is not set # CONFIG_AGP_GART is not set
# CONFIG_AGP3 is not set
# CONFIG_DRM is not set # CONFIG_DRM is not set
# CONFIG_MWAVE is not set # CONFIG_MWAVE is not set
CONFIG_RAW_DRIVER=y CONFIG_RAW_DRIVER=y
...@@ -585,6 +586,11 @@ CONFIG_DUMMY_CONSOLE=y ...@@ -585,6 +586,11 @@ CONFIG_DUMMY_CONSOLE=y
# #
# CONFIG_BT is not set # CONFIG_BT is not set
#
# Profiling support
#
# CONFIG_PROFILING is not set
# #
# Kernel hacking # Kernel hacking
# #
......
...@@ -6,4 +6,4 @@ export-objs := ia32_ioctl.o sys_ia32.o ...@@ -6,4 +6,4 @@ export-objs := ia32_ioctl.o sys_ia32.o
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \ obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \
ia32_signal.o \ ia32_signal.o \
ia32_binfmt.o fpu32.o socket32.o ptrace32.o ipc32.o ia32_binfmt.o fpu32.o socket32.o ptrace32.o ipc32.o syscall32.o
...@@ -70,10 +70,6 @@ static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) ...@@ -70,10 +70,6 @@ static inline unsigned long twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
return ret; return ret;
} }
struct s10 {
u64 a;
u16 b;
} __attribute__((packed));
static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave, static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
struct _fpstate_ia32 *buf) struct _fpstate_ia32 *buf)
...@@ -98,9 +94,7 @@ static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave, ...@@ -98,9 +94,7 @@ static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
to = (struct _fpxreg *)&fxsave->st_space[0]; to = (struct _fpxreg *)&fxsave->st_space[0];
from = &buf->_st[0]; from = &buf->_st[0];
for (i = 0 ; i < 8 ; i++, to++, from++) { for (i = 0 ; i < 8 ; i++, to++, from++) {
struct s10 *top = (void *)to, *fromp = (void *)from; if (__copy_from_user(to, from, sizeof(*from)))
if (__put_user(fromp->a, &top->a) ||
__put_user(fromp->b, &top->b))
return -1; return -1;
} }
return 0; return 0;
...@@ -136,9 +130,7 @@ static inline int convert_fxsr_to_user(struct _fpstate_ia32 *buf, ...@@ -136,9 +130,7 @@ static inline int convert_fxsr_to_user(struct _fpstate_ia32 *buf,
to = &buf->_st[0]; to = &buf->_st[0];
from = (struct _fpxreg *) &fxsave->st_space[0]; from = (struct _fpxreg *) &fxsave->st_space[0];
for ( i = 0 ; i < 8 ; i++, to++, from++ ) { for ( i = 0 ; i < 8 ; i++, to++, from++ ) {
struct s10 *top = (void *)top, *fromp = (void *)from; if (__copy_to_user(to, from, sizeof(*to)))
if (__get_user(fromp->a, &top->a) ||
__get_user(fromp->b, &top->b))
return -1; return -1;
} }
return 0; return 0;
......
...@@ -23,6 +23,12 @@ ...@@ -23,6 +23,12 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/ia32.h> #include <asm/ia32.h>
#define ELF_NAME "elf/i386"
#define AT_SYSINFO 32
#define ARCH_DLINFO NEW_AUX_ENT(AT_SYSINFO, 0xffffe000)
struct file; struct file;
struct elf_phdr; struct elf_phdr;
......
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
* Compatibility mode system call entry point for x86-64. * Compatibility mode system call entry point for x86-64.
* *
* Copyright 2000-2002 Andi Kleen, SuSE Labs. * Copyright 2000-2002 Andi Kleen, SuSE Labs.
*
* $Id: ia32entry.S,v 1.31 2002/03/24 13:01:45 ak Exp $
*/ */
#include <asm/calling.h> #include <asm/calling.h>
...@@ -15,19 +13,21 @@ ...@@ -15,19 +13,21 @@
#include <asm/segment.h> #include <asm/segment.h>
#include <linux/linkage.h> #include <linux/linkage.h>
.macro IA32_ARG_FIXUP .macro IA32_ARG_FIXUP noebp=0
movl %edi,%r8d movl %edi,%r8d
.if \noebp
.else
movl %ebp,%r9d movl %ebp,%r9d
.endif
xchg %ecx,%esi xchg %ecx,%esi
movl %ebx,%edi movl %ebx,%edi
movl %edx,%edx /* zero extension */ movl %edx,%edx /* zero extension */
.endm .endm
/* /*
* 32bit SYSCALL instruction entry. This is called from the 32bit vsyscall page. * 32bit SYSCALL instruction entry.
*
* Register setup:
* *
* Arguments:
* %eax System call number. * %eax System call number.
* %ebx Arg1 * %ebx Arg1
* %ecx return EIP * %ecx return EIP
...@@ -53,40 +53,51 @@ ENTRY(ia32_cstar_target) ...@@ -53,40 +53,51 @@ ENTRY(ia32_cstar_target)
movl %eax,%eax /* zero extension */ movl %eax,%eax /* zero extension */
movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
movq %rcx,RIP-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp)
movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
movl %ebp,%ecx movl %ebp,%ecx
movq $__USER32_CS,CS-ARGOFFSET(%rsp) movq $__USER32_CS,CS-ARGOFFSET(%rsp)
movq $__USER32_DS,SS-ARGOFFSET(%rsp) movq $__USER32_DS,SS-ARGOFFSET(%rsp)
movq %r11,EFLAGS-ARGOFFSET(%rsp) movq %r11,EFLAGS-ARGOFFSET(%rsp)
movq %r8,RSP-ARGOFFSET(%rsp) movq %r8,RSP-ARGOFFSET(%rsp)
/* no need to do an access_ok check here because the 32bit /* no need to do an access_ok check here because r8 has been
user space cannot set r8 to a value > 4GB and the kernel has no 32bit zero extended */
memory mapping in the first 4GB. */
/* hardware stack frame is complete now */ /* hardware stack frame is complete now */
1: movl (%r8),%ebp 1: movl (%r8),%r9d
.section __ex_table,"a" .section __ex_table,"a"
.quad 1b,cstar_badarg .quad 1b,cstar_badarg
.previous .previous
movq %r9,R9-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10) bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
jc ia32_tracesys jc ia32_tracesys
cstar_do_call:
cmpl $IA32_NR_syscalls,%eax cmpl $IA32_NR_syscalls,%eax
jae ia32_badsys jae ia32_badsys
IA32_ARG_FIXUP IA32_ARG_FIXUP 1
call *ia32_sys_call_table(,%rax,8) call *ia32_sys_call_table(,%rax,8)
.globl ia32_sysret .globl cstar_sysret
cstar_sysret: cstar_sysret:
movq %rax,RAX-ARGOFFSET(%rsp) movq %rax,RAX-ARGOFFSET(%rsp)
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
cli cli
testl $_TIF_WORK_MASK,threadinfo_flags(%r10) testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10)
jnz int_ret_from_sys_call jnz int_ret_from_sys_call
RESTORE_ARGS 1,0,1,1 /* could avoid the stack restore here */ RESTORE_ARGS 1,-ARG_SKIP,1,1
movl RIP-SWFRAME(%rsp),%ecx movl RIP-ARGOFFSET(%rsp),%ecx
movl RSP-SWFRAME(%rsp),%esp movl EFLAGS-ARGOFFSET(%rsp),%r11d
movl EFLAGS-SWFRAME(%rsp),%r11d movl RSP-ARGOFFSET(%rsp),%esp
swapgs swapgs
sysretl sysretl
cstar_tracesys:
SAVE_REST
movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
jmp cstar_do_call
cstar_badarg: cstar_badarg:
movq $-EFAULT,%rax movq $-EFAULT,%rax
jmp cstar_sysret jmp cstar_sysret
...@@ -115,8 +126,11 @@ cstar_badarg: ...@@ -115,8 +126,11 @@ cstar_badarg:
ENTRY(ia32_syscall) ENTRY(ia32_syscall)
swapgs swapgs
sti sti
movl %eax,%eax
pushq %rax pushq %rax
cld cld
/* note the registers are not zero extended to the sf.
this could be a problem */
SAVE_ARGS SAVE_ARGS
GET_THREAD_INFO(%r10) GET_THREAD_INFO(%r10)
bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10) bt $TIF_SYSCALL_TRACE,threadinfo_flags(%r10)
......
...@@ -86,6 +86,11 @@ ...@@ -86,6 +86,11 @@
extern int overflowuid,overflowgid; extern int overflowuid,overflowgid;
extern asmlinkage long sys_newstat(char * filename, struct stat * statbuf);
extern asmlinkage long sys_newlstat(char * filename, struct stat * statbuf);
extern asmlinkage long sys_newfstat(unsigned int fd, struct stat * statbuf);
int cp_compat_stat(struct kstat *kbuf, struct compat_stat *ubuf) int cp_compat_stat(struct kstat *kbuf, struct compat_stat *ubuf)
{ {
if (verify_area(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) || if (verify_area(VERIFY_WRITE, ubuf, sizeof(struct compat_stat)) ||
......
/* Copyright 2002 Andi Kleen, SuSE Labs */
/* vsyscall handling for 32bit processes. Map a stub page into it
on demand because 32bit cannot reach the kernel's fixmaps */
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/init.h>
#include <asm/proto.h>
#include <asm/tlbflush.h>
/* 32bit SYSCALL stub mapped into user space. */
asm(" .code32\n"
"\nsyscall32:\n"
" pushl %ebp\n"
" movl %ecx,%ebp\n"
" syscall\n"
" popl %ebp\n"
" ret\n"
"syscall32_end:\n"
" .code64\n");
extern unsigned char syscall32[], syscall32_end[];
static unsigned long syscall32_page;
/* RED-PEN: This knows too much about high level VM */
/* Alternative would be to generate a vma with appropiate backing options
and let it be handled by generic VM */
int map_syscall32(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
int err = 0;
down_read(&mm->mmap_sem);
spin_lock(&mm->page_table_lock);
pmd_t *pmd = pmd_alloc(mm, pgd_offset(mm, address), address);
if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) {
if (pte_none(*pte)) {
set_pte(pte,
mk_pte(virt_to_page(syscall32_page),
PAGE_KERNEL_VSYSCALL));
}
/* Flush only the local CPU. Other CPUs taking a fault
will just end up here again */
__flush_tlb_one(address);
} else
err = -ENOMEM;
spin_unlock(&mm->page_table_lock);
up_read(&mm->mmap_sem);
return err;
}
static int __init init_syscall32(void)
{
syscall32_page = get_zeroed_page(GFP_KERNEL);
if (!syscall32_page)
panic("Cannot allocate syscall32 page");
SetPageReserved(virt_to_page(syscall32_page));
memcpy((void *)syscall32_page, syscall32, syscall32_end - syscall32);
}
__initcall(init_syscall32);
...@@ -9,7 +9,7 @@ export-objs := x8664_ksyms.o pci-gart.o pci-dma.o ...@@ -9,7 +9,7 @@ export-objs := x8664_ksyms.o pci-gart.o pci-dma.o
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \
pci-dma.o x8664_ksyms.o i387.o syscall.o vsyscall.o \ pci-dma.o x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bluesmoke.o bootflag.o e820.o reboot.o profile.o setup64.o bluesmoke.o bootflag.o e820.o reboot.o
obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_MSR) += msr.o
...@@ -24,6 +24,8 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o ...@@ -24,6 +24,8 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_PROFILING) += profile.o
$(obj)/bootflag.c: $(obj)/bootflag.c:
@ln -sf ../../i386/kernel/bootflag.c $(obj)/bootflag.c @ln -sf ../../i386/kernel/bootflag.c $(obj)/bootflag.c
......
...@@ -987,6 +987,7 @@ asmlinkage void smp_spurious_interrupt(void) ...@@ -987,6 +987,7 @@ asmlinkage void smp_spurious_interrupt(void)
printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n", printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
smp_processor_id(), skipped); smp_processor_id(), skipped);
last_warning = jiffies; last_warning = jiffies;
skipped = 0;
} else { } else {
skipped++; skipped++;
} }
......
...@@ -472,6 +472,11 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_ ...@@ -472,6 +472,11 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, __u32 thread_info_
thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current));
#endif #endif
/* Pending single-step? */
if (thread_info_flags & _TIF_SINGLESTEP) {
regs->eflags |= TF_MASK;
clear_thread_flag(TIF_SINGLESTEP);
}
/* deal with pending signal delivery */ /* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING) if (thread_info_flags & _TIF_SIGPENDING)
......
...@@ -221,6 +221,19 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) ...@@ -221,6 +221,19 @@ static void timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
do_timer(regs); do_timer(regs);
/*
* In the SMP case we use the local APIC timer interrupt to do the profiling,
* except when we simulate SMP mode on a uniprocessor system, in that case we
* have to call the local interrupt handler.
*/
#ifndef CONFIG_X86_LOCAL_APIC
x86_do_profile(regs);
#else
if (!using_apic_timer)
smp_local_timer_interrupt(regs);
#endif
/* /*
* If we have an externally synchronized Linux clock, then update CMOS clock * If we have an externally synchronized Linux clock, then update CMOS clock
* accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
......
...@@ -635,7 +635,7 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code) ...@@ -635,7 +635,7 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
* interface. * interface.
*/ */
if ((regs->cs & 3) == 0) if ((regs->cs & 3) == 0)
goto clear_TF; goto clear_TF_reenable;
if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
goto clear_TF; goto clear_TF;
} }
...@@ -653,6 +653,9 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code) ...@@ -653,6 +653,9 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
asm("movq %0,%%db7"::"r"(0UL)); asm("movq %0,%%db7"::"r"(0UL));
return; return;
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
clear_TF: clear_TF:
regs->eflags &= ~TF_MASK; regs->eflags &= ~TF_MASK;
return; return;
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/nmi.h>
#include <asm/kdebug.h> #include <asm/kdebug.h>
#include <asm/unistd.h> #include <asm/unistd.h>
...@@ -116,7 +117,11 @@ EXPORT_SYMBOL_NOVERS(__read_lock_failed); ...@@ -116,7 +117,11 @@ EXPORT_SYMBOL_NOVERS(__read_lock_failed);
EXPORT_SYMBOL(synchronize_irq); EXPORT_SYMBOL(synchronize_irq);
EXPORT_SYMBOL(smp_call_function); EXPORT_SYMBOL(smp_call_function);
#endif
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PM)
EXPORT_SYMBOL_GPL(set_nmi_pm_callback);
EXPORT_SYMBOL_GPL(unset_nmi_pm_callback);
#endif #endif
#ifdef CONFIG_VT #ifdef CONFIG_VT
...@@ -127,6 +132,11 @@ EXPORT_SYMBOL(get_wchan); ...@@ -127,6 +132,11 @@ EXPORT_SYMBOL(get_wchan);
EXPORT_SYMBOL(rtc_lock); EXPORT_SYMBOL(rtc_lock);
EXPORT_SYMBOL_GPL(register_profile_notifier);
EXPORT_SYMBOL_GPL(unregister_profile_notifier);
EXPORT_SYMBOL_GPL(set_nmi_callback);
EXPORT_SYMBOL_GPL(unset_nmi_callback);
/* Export string functions. We normally rely on gcc builtin for most of these, /* Export string functions. We normally rely on gcc builtin for most of these,
but gcc sometimes decides not to inline them. */ but gcc sometimes decides not to inline them. */
#undef memcpy #undef memcpy
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <asm/hardirq.h> #include <asm/hardirq.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/proto.h>
extern void die(const char *,struct pt_regs *,long); extern void die(const char *,struct pt_regs *,long);
...@@ -211,6 +212,15 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -211,6 +212,15 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
/* User mode accesses just cause a SIGSEGV */ /* User mode accesses just cause a SIGSEGV */
if (error_code & 4) { if (error_code & 4) {
#ifdef CONFIG_IA32_EMULATION
/* 32bit vsyscall. map on demand. */
if (test_thread_flag(TIF_IA32) &&
address >= 0xffffe000 && address < 0xffffefff-7) {
if (map_syscall32(mm, address) < 0)
goto out_of_memory2;
return;
}
#endif
printk("%s[%d] segfault at rip:%lx rsp:%lx adr:%lx err:%lx\n", printk("%s[%d] segfault at rip:%lx rsp:%lx adr:%lx err:%lx\n",
tsk->comm, tsk->pid, regs->rip, regs->rsp, address, tsk->comm, tsk->pid, regs->rip, regs->rsp, address,
error_code); error_code);
...@@ -263,6 +273,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -263,6 +273,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
*/ */
out_of_memory: out_of_memory:
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
out_of_memory2:
if (current->pid == 1) { if (current->pid == 1) {
yield(); yield();
goto again; goto again;
...@@ -300,9 +311,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) ...@@ -300,9 +311,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
printk("vmalloc_fault err %lx addr %lx rip %lx\n",
error_code, address, regs->rip);
/* /*
* x86-64 has the same kernel 3rd level pages for all CPUs. * x86-64 has the same kernel 3rd level pages for all CPUs.
* But for vmalloc/modules the TLB synchronization works lazily, * But for vmalloc/modules the TLB synchronization works lazily,
......
menu "Profiling support"
depends on EXPERIMENTAL
config PROFILING
bool "Profiling support (EXPERIMENTAL)"
help
Say Y here to enable the extended profiling support mechanisms used
by profilers such as OProfile.
config OPROFILE
tristate "OProfile system profiling (EXPERIMENTAL)"
depends on PROFILING
help
OProfile is a profiling system capable of profiling the
whole system, include the kernel, kernel modules, libraries,
and applications.
If unsure, say N.
endmenu
#
# oprofile for x86-64.
# Just reuse the one from i386. The Hammer performance counters
# are similar to Athlon.
#
obj-$(CONFIG_OPROFILE) += oprofile.o
DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
oprof.o cpu_buffer.o buffer_sync.o \
event_buffer.o oprofile_files.o \
oprofilefs.o oprofile_stats.o )
oprofile-objs := $(DRIVER_OBJS) init.o timer_int.o
oprofile-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o
INCL := $(obj)/op_counter.h $(obj)/op_x86_model.h
$(obj)/nmi_int.c: ${INCL}
@ln -sf ../../i386/oprofile/nmi_int.c $(obj)/nmi_int.c
$(obj)/op_model_athlon.c: ${INCL}
@ln -sf ../../i386/oprofile/op_model_athlon.c $(obj)/op_model_athlon.c
$(obj)/init.c: ${INCL}
@ln -sf ../../i386/oprofile/init.c $(obj)/init.c
$(obj)/timer_int.c: ${INCL}
@ln -sf ../../i386/oprofile/timer_int.c $(obj)/timer_int.c
$(obj)/op_counter.h:
@ln -sf ../../i386/oprofile/op_counter.h $(obj)/op_counter.h
$(obj)/op_x86_model.h:
@ln -sf ../../i386/oprofile/op_x86_model.h $(obj)/op_x86_model.h
clean-files += op_x86_model.h op_counter.h timer_int.c init.c \
op_model_athlon.c nmi_int.c
...@@ -244,7 +244,7 @@ static __inline__ int constant_test_bit(int nr, const volatile void * addr) ...@@ -244,7 +244,7 @@ static __inline__ int constant_test_bit(int nr, const volatile void * addr)
return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
} }
static __inline__ int variable_test_bit(int nr, volatile void * addr) static __inline__ int variable_test_bit(int nr, volatile const void * addr)
{ {
int oldbit; int oldbit;
......
...@@ -131,7 +131,7 @@ static inline void set_tss_desc(unsigned cpu, void *addr) ...@@ -131,7 +131,7 @@ static inline void set_tss_desc(unsigned cpu, void *addr)
static inline void set_ldt_desc(unsigned cpu, void *addr, int size) static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
{ {
set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (unsigned long)addr, set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_LDT], (unsigned long)addr,
DESC_LDT, size); DESC_LDT, size);
} }
......
#include <asm-generic/dma-mapping.h>
...@@ -168,9 +168,24 @@ static inline void x86_do_profile (struct pt_regs *regs) ...@@ -168,9 +168,24 @@ static inline void x86_do_profile (struct pt_regs *regs)
struct notifier_block; struct notifier_block;
#ifdef CONFIG_PROFILING
int register_profile_notifier(struct notifier_block * nb); int register_profile_notifier(struct notifier_block * nb);
int unregister_profile_notifier(struct notifier_block * nb); int unregister_profile_notifier(struct notifier_block * nb);
#else
static inline int register_profile_notifier(struct notifier_block * nb)
{
return -ENOSYS;
}
static inline int unregister_profile_notifier(struct notifier_block * nb)
{
return -ENOSYS;
}
#endif /* CONFIG_PROFILING */
#ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */ #ifdef CONFIG_SMP /*more of this file should probably be ifdefed SMP */
static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {
if (IO_APIC_IRQ(i)) if (IO_APIC_IRQ(i))
......
...@@ -202,7 +202,7 @@ struct iovec32 { ...@@ -202,7 +202,7 @@ struct iovec32 {
int iov_len; int iov_len;
}; };
#define IA32_PAGE_OFFSET 0xffff0000 #define IA32_PAGE_OFFSET 0xffffe000
#define IA32_STACK_TOP IA32_PAGE_OFFSET #define IA32_STACK_TOP IA32_PAGE_OFFSET
#endif /* !CONFIG_IA32_SUPPORT */ #endif /* !CONFIG_IA32_SUPPORT */
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <asm/types.h> #include <asm/types.h>
#include <asm/mpspec.h>
/* /*
* Intel IO-APIC support for SMP and UP systems. * Intel IO-APIC support for SMP and UP systems.
......
...@@ -256,7 +256,7 @@ static inline void clear_in_cr4 (unsigned long mask) ...@@ -256,7 +256,7 @@ static inline void clear_in_cr4 (unsigned long mask)
/* This decides where the kernel will search for a free chunk of vm /* This decides where the kernel will search for a free chunk of vm
* space during mmap's. * space during mmap's.
*/ */
#define TASK_UNMAPPED_32 0x40000000 #define TASK_UNMAPPED_32 0xa0000000
#define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3) #define TASK_UNMAPPED_64 PAGE_ALIGN(TASK_SIZE/3)
#define TASK_UNMAPPED_BASE \ #define TASK_UNMAPPED_BASE \
(test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64) (test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
......
...@@ -42,6 +42,8 @@ extern void exception_table_check(void); ...@@ -42,6 +42,8 @@ extern void exception_table_check(void);
extern int acpi_boot_init(char *); extern int acpi_boot_init(char *);
extern int map_syscall32(struct mm_struct *mm, unsigned long address);
#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1)) #define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))
#define round_down(x,y) ((x) & ~((y)-1)) #define round_down(x,y) ((x) & ~((y)-1))
......
...@@ -48,8 +48,8 @@ typedef struct { ...@@ -48,8 +48,8 @@ typedef struct {
"js 2f\n" \ "js 2f\n" \
LOCK_SECTION_START("") \ LOCK_SECTION_START("") \
"2:\t" \ "2:\t" \
"cmpb $0,%0\n\t" \
"rep;nop\n\t" \ "rep;nop\n\t" \
"cmpb $0,%0\n\t" \
"jle 2b\n\t" \ "jle 2b\n\t" \
"jmp 1b\n" \ "jmp 1b\n" \
LOCK_SECTION_END LOCK_SECTION_END
......
...@@ -254,7 +254,12 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, ...@@ -254,7 +254,12 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
*/ */
#define mb() asm volatile("mfence":::"memory") #define mb() asm volatile("mfence":::"memory")
#define rmb() asm volatile("lfence":::"memory") #define rmb() asm volatile("lfence":::"memory")
#define wmb() asm volatile("sfence":::"memory")
/* could use SFENCE here, but it would be only needed for unordered SSE
store instructions and we always do an explicit sfence with them currently.
the ordering of normal stores is serialized enough. Just make it a compile
barrier. */
#define wmb() asm volatile("" ::: "memory")
#define read_barrier_depends() do {} while(0) #define read_barrier_depends() do {} while(0)
#define set_mb(var, value) do { xchg(&var, value); } while (0) #define set_mb(var, value) do { xchg(&var, value); } while (0)
#define set_wmb(var, value) do { var = value; wmb(); } while (0) #define set_wmb(var, value) do { var = value; wmb(); } while (0)
......
...@@ -100,6 +100,7 @@ static inline struct thread_info *stack_thread_info(void) ...@@ -100,6 +100,7 @@ static inline struct thread_info *stack_thread_info(void)
#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */ #define TIF_NOTIFY_RESUME 1 /* resumption notification requested */
#define TIF_SIGPENDING 2 /* signal pending */ #define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
#define TIF_USEDFPU 16 /* FPU was used by this task this quantum */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum */
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_IA32 18 /* 32bit process */ #define TIF_IA32 18 /* 32bit process */
...@@ -107,6 +108,7 @@ static inline struct thread_info *stack_thread_info(void) ...@@ -107,6 +108,7 @@ static inline struct thread_info *stack_thread_info(void)
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_USEDFPU (1<<TIF_USEDFPU) #define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
......
...@@ -103,7 +103,8 @@ extern void __get_user_8(void); ...@@ -103,7 +103,8 @@ extern void __get_user_8(void);
/* Careful: we have to cast the result to the type of the pointer for sign reasons */ /* Careful: we have to cast the result to the type of the pointer for sign reasons */
#define get_user(x,ptr) \ #define get_user(x,ptr) \
({ long __ret_gu,__val_gu; \ ({ long __val_gu; \
int __ret_gu; \
switch(sizeof (*(ptr))) { \ switch(sizeof (*(ptr))) { \
case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \ case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \
case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \ case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \
...@@ -138,7 +139,7 @@ extern void __put_user_bad(void); ...@@ -138,7 +139,7 @@ extern void __put_user_bad(void);
#define __put_user_nocheck(x,ptr,size) \ #define __put_user_nocheck(x,ptr,size) \
({ \ ({ \
long __pu_err; \ int __pu_err; \
__put_user_size((x),(ptr),(size),__pu_err); \ __put_user_size((x),(ptr),(size),__pu_err); \
__pu_err; \ __pu_err; \
}) })
...@@ -146,7 +147,7 @@ extern void __put_user_bad(void); ...@@ -146,7 +147,7 @@ extern void __put_user_bad(void);
#define __put_user_check(x,ptr,size) \ #define __put_user_check(x,ptr,size) \
({ \ ({ \
long __pu_err = -EFAULT; \ int __pu_err = -EFAULT; \
__typeof__(*(ptr)) *__pu_addr = (ptr); \ __typeof__(*(ptr)) *__pu_addr = (ptr); \
if (access_ok(VERIFY_WRITE,__pu_addr,size)) \ if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
__put_user_size((x),__pu_addr,(size),__pu_err); \ __put_user_size((x),__pu_addr,(size),__pu_err); \
...@@ -157,10 +158,10 @@ extern void __put_user_bad(void); ...@@ -157,10 +158,10 @@ extern void __put_user_bad(void);
do { \ do { \
retval = 0; \ retval = 0; \
switch (size) { \ switch (size) { \
case 1: __put_user_asm(x,ptr,retval,"b","b","iq"); break; \ case 1: __put_user_asm(x,ptr,retval,"b","b","iq",-EFAULT); break;\
case 2: __put_user_asm(x,ptr,retval,"w","w","ir"); break; \ case 2: __put_user_asm(x,ptr,retval,"w","w","ir",-EFAULT); break;\
case 4: __put_user_asm(x,ptr,retval,"l","k","ir"); break; \ case 4: __put_user_asm(x,ptr,retval,"l","k","ir",-EFAULT); break;\
case 8: __put_user_asm(x,ptr,retval,"q","","ir"); break; \ case 8: __put_user_asm(x,ptr,retval,"q","","ir",-EFAULT); break;\
default: __put_user_bad(); \ default: __put_user_bad(); \
} \ } \
} while (0) } while (0)
...@@ -174,12 +175,12 @@ struct __large_struct { unsigned long buf[100]; }; ...@@ -174,12 +175,12 @@ struct __large_struct { unsigned long buf[100]; };
* we do not write to any memory gcc knows about, so there are no * we do not write to any memory gcc knows about, so there are no
* aliasing issues. * aliasing issues.
*/ */
#define __put_user_asm(x, addr, err, itype, rtype, ltype) \ #define __put_user_asm(x, addr, err, itype, rtype, ltype, errno) \
__asm__ __volatile__( \ __asm__ __volatile__( \
"1: mov"itype" %"rtype"1,%2\n" \ "1: mov"itype" %"rtype"1,%2\n" \
"2:\n" \ "2:\n" \
".section .fixup,\"ax\"\n" \ ".section .fixup,\"ax\"\n" \
"3: movq %3,%0\n" \ "3: mov %3,%0\n" \
" jmp 2b\n" \ " jmp 2b\n" \
".previous\n" \ ".previous\n" \
".section __ex_table,\"a\"\n" \ ".section __ex_table,\"a\"\n" \
...@@ -187,32 +188,33 @@ struct __large_struct { unsigned long buf[100]; }; ...@@ -187,32 +188,33 @@ struct __large_struct { unsigned long buf[100]; };
" .quad 1b,3b\n" \ " .quad 1b,3b\n" \
".previous" \ ".previous" \
: "=r"(err) \ : "=r"(err) \
: ltype (x), "m"(__m(addr)), "i"(-EFAULT), "0"(err)) : ltype (x), "m"(__m(addr)), "i"(errno), "0"(err))
#define __get_user_nocheck(x,ptr,size) \ #define __get_user_nocheck(x,ptr,size) \
({ \ ({ \
long __gu_err, __gu_val; \ int __gu_err; \
long __gu_val; \
__get_user_size(__gu_val,(ptr),(size),__gu_err); \ __get_user_size(__gu_val,(ptr),(size),__gu_err); \
(x) = (__typeof__(*(ptr)))__gu_val; \ (x) = (__typeof__(*(ptr)))__gu_val; \
__gu_err; \ __gu_err; \
}) })
extern long __get_user_bad(void); extern int __get_user_bad(void);
#define __get_user_size(x,ptr,size,retval) \ #define __get_user_size(x,ptr,size,retval) \
do { \ do { \
retval = 0; \ retval = 0; \
switch (size) { \ switch (size) { \
case 1: __get_user_asm(x,ptr,retval,"b","b","=q"); break; \ case 1: __get_user_asm(x,ptr,retval,"b","b","=q",-EFAULT); break;\
case 2: __get_user_asm(x,ptr,retval,"w","w","=r"); break; \ case 2: __get_user_asm(x,ptr,retval,"w","w","=r",-EFAULT); break;\
case 4: __get_user_asm(x,ptr,retval,"l","k","=r"); break; \ case 4: __get_user_asm(x,ptr,retval,"l","k","=r",-EFAULT); break;\
case 8: __get_user_asm(x,ptr,retval,"q","","=r"); break; \ case 8: __get_user_asm(x,ptr,retval,"q","","=r",-EFAULT); break;\
default: (x) = __get_user_bad(); \ default: (x) = __get_user_bad(); \
} \ } \
} while (0) } while (0)
#define __get_user_asm(x, addr, err, itype, rtype, ltype) \ #define __get_user_asm(x, addr, err, itype, rtype, ltype, errno) \
__asm__ __volatile__( \ __asm__ __volatile__( \
"1: mov"itype" %2,%"rtype"1\n" \ "1: mov"itype" %2,%"rtype"1\n" \
"2:\n" \ "2:\n" \
...@@ -226,23 +228,77 @@ do { \ ...@@ -226,23 +228,77 @@ do { \
" .quad 1b,3b\n" \ " .quad 1b,3b\n" \
".previous" \ ".previous" \
: "=r"(err), ltype (x) \ : "=r"(err), ltype (x) \
: "m"(__m(addr)), "i"(-EFAULT), "0"(err)) : "m"(__m(addr)), "i"(errno), "0"(err))
/* /*
* Copy To/From Userspace * Copy To/From Userspace
*
* This relies on an optimized common worker function.
*
* Could do special inline versions for small constant copies, but avoid this
* for now. It's not clear it is worth it.
*/ */
/* Handles exceptions in both to and from, but doesn't do access_ok */
extern unsigned long copy_user_generic(void *to, const void *from, unsigned len); extern unsigned long copy_user_generic(void *to, const void *from, unsigned len);
extern unsigned long copy_to_user(void *to, const void *from, unsigned len); extern unsigned long copy_to_user(void *to, const void *from, unsigned len);
extern unsigned long copy_from_user(void *to, const void *from, unsigned len); extern unsigned long copy_from_user(void *to, const void *from, unsigned len);
#define __copy_to_user copy_user_generic
#define __copy_from_user copy_user_generic static inline int __copy_from_user(void *dst, void *src, unsigned size)
{
if (!__builtin_constant_p(size))
return copy_user_generic(dst,src,size);
int ret = 0;
switch (size) {
case 1:__get_user_asm(*(u8*)dst,(u8 *)src,ret,"b","b","=q",1);
return ret;
case 2:__get_user_asm(*(u16*)dst,(u16*)src,ret,"w","w","=r",2);
return ret;
case 4:__get_user_asm(*(u32*)dst,(u32*)src,ret,"l","k","=r",4);
return ret;
case 8:__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",8);
return ret;
case 10:
__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16);
if (ret) return ret;
__get_user_asm(*(u16*)(8+dst),(u16*)(8+src),ret,"w","w","=r",2);
return ret;
case 16:
__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16);
if (ret) return ret;
__get_user_asm(*(u64*)(8+dst),(u64*)(8+src),ret,"q","","=r",8);
return ret;
default:
return copy_user_generic(dst,src,size);
}
}
static inline int __copy_to_user(void *dst, void *src, unsigned size)
{
if (!__builtin_constant_p(size))
return copy_user_generic(dst,src,size);
int ret = 0;
switch (size) {
case 1:__put_user_asm(*(u8*)src,(u8 *)dst,ret,"b","b","iq",1);
return ret;
case 2:__put_user_asm(*(u16*)src,(u16*)dst,ret,"w","w","ir",2);
return ret;
case 4:__put_user_asm(*(u32*)src,(u32*)dst,ret,"l","k","ir",4);
return ret;
case 8:__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",8);
return ret;
case 10:
__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",10);
if (ret) return ret;
asm("":::"memory");
__put_user_asm(4[(u16*)src],4+(u16*)dst,ret,"w","w","ir",2);
return ret;
case 16:
__put_user_asm(*(u64*)src,(u64*)dst,ret,"q","","ir",16);
if (ret) return ret;
asm("":::"memory");
__put_user_asm(1[(u64*)src],1+(u64*)dst,ret,"q","","ir",8);
return ret;
default:
return copy_user_generic(dst,src,size);
}
}
long strncpy_from_user(char *dst, const char *src, long count); long strncpy_from_user(char *dst, const char *src, long count);
long __strncpy_from_user(char *dst, const char *src, long count); long __strncpy_from_user(char *dst, const char *src, long count);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment