Commit b068ec41 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 merge

x86_64 core updates.

 - Make it compile again (switch_to macros etc., add dummy suspend.h)
 - reenable strength reduce optimization
 - Fix ramdisk (patch from Mikael Pettersson)
 - Some merges from i386
 - Reimplement lazy iobitmap allocation.  I reimplemented it based
   on bcrl's idea.
 - Fix IPC 32bit emulation to actually work and move into own file
 - New fixed mtrr.c from DaveJ ported from 2.4 and reenable it.
 - Move tlbstate into PDA.
 - Add some changes that got lost during the last merge.
 - new memset that seems to actually work.
 - Align signal handler stack frames to 16 bytes.
 - Some more minor bugfixes.
parent 9343c8e2
...@@ -43,15 +43,9 @@ CFLAGS += -mcmodel=kernel ...@@ -43,15 +43,9 @@ CFLAGS += -mcmodel=kernel
CFLAGS += -pipe CFLAGS += -pipe
# this makes reading assembly source easier # this makes reading assembly source easier
CFLAGS += -fno-reorder-blocks CFLAGS += -fno-reorder-blocks
# needed for later gcc 3.1
CFLAGS += -finline-limit=2000 CFLAGS += -finline-limit=2000
# needed for earlier gcc 3.1
#CFLAGS += -fno-strength-reduce
#CFLAGS += -g #CFLAGS += -g
# prevent gcc from keeping the stack 16 byte aligned (FIXME)
#CFLAGS += -mpreferred-stack-boundary=2
HEAD := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o HEAD := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
SUBDIRS := arch/x86_64/tools $(SUBDIRS) arch/x86_64/kernel arch/x86_64/mm arch/x86_64/lib SUBDIRS := arch/x86_64/tools $(SUBDIRS) arch/x86_64/kernel arch/x86_64/mm arch/x86_64/lib
......
...@@ -21,10 +21,6 @@ ROOT_DEV := CURRENT ...@@ -21,10 +21,6 @@ ROOT_DEV := CURRENT
SVGA_MODE := -DSVGA_MODE=NORMAL_VGA SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
# If you want the RAM disk device, define this to be the size in blocks.
RAMDISK := -DRAMDISK=512
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
BOOT_INCL = $(TOPDIR)/include/linux/config.h \ BOOT_INCL = $(TOPDIR)/include/linux/config.h \
......
...@@ -47,8 +47,7 @@ define_bool CONFIG_EISA n ...@@ -47,8 +47,7 @@ define_bool CONFIG_EISA n
define_bool CONFIG_X86_IO_APIC y define_bool CONFIG_X86_IO_APIC y
define_bool CONFIG_X86_LOCAL_APIC y define_bool CONFIG_X86_LOCAL_APIC y
#currently broken: bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
#bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
bool 'Symmetric multi-processing support' CONFIG_SMP bool 'Symmetric multi-processing support' CONFIG_SMP
if [ "$CONFIG_SMP" = "n" ]; then if [ "$CONFIG_SMP" = "n" ]; then
bool 'Preemptible Kernel' CONFIG_PREEMPT bool 'Preemptible Kernel' CONFIG_PREEMPT
...@@ -226,6 +225,7 @@ if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then ...@@ -226,6 +225,7 @@ if [ "$CONFIG_DEBUG_KERNEL" != "n" ]; then
bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK
bool ' Additional run-time checks' CONFIG_CHECKING bool ' Additional run-time checks' CONFIG_CHECKING
bool ' Debug __init statements' CONFIG_INIT_DEBUG bool ' Debug __init statements' CONFIG_INIT_DEBUG
bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK
fi fi
endmenu endmenu
......
...@@ -9,8 +9,9 @@ export-objs := ia32_ioctl.o sys_ia32.o ...@@ -9,8 +9,9 @@ export-objs := ia32_ioctl.o sys_ia32.o
all: ia32.o all: ia32.o
O_TARGET := ia32.o O_TARGET := ia32.o
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o \ obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \
ia32_binfmt.o fpu32.o socket32.o ptrace32.o ia32_signal.o \
ia32_binfmt.o fpu32.o socket32.o ptrace32.o ipc32.o
clean:: clean::
......
This diff is collapsed.
This diff is collapsed.
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/stddef.h> #include <linux/stddef.h>
#include <linux/slab.h>
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value) static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_value)
...@@ -61,27 +62,19 @@ asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on) ...@@ -61,27 +62,19 @@ asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
return -EINVAL; return -EINVAL;
if (turn_on && !capable(CAP_SYS_RAWIO)) if (turn_on && !capable(CAP_SYS_RAWIO))
return -EPERM; return -EPERM;
/*
* If it's the first ioperm() call in this thread's lifetime, set the if (!t->io_bitmap_ptr) {
* IO bitmap up. ioperm() is much less timing critical than clone(), t->io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL);
* this is why we delay this operation until now: if (!t->io_bitmap_ptr)
*/ return -ENOMEM;
if (!t->ioperm) { memset(t->io_bitmap_ptr,0xff,(IO_BITMAP_SIZE+1)*4);
/*
* just in case ...
*/
memset(t->io_bitmap,0xff,(IO_BITMAP_SIZE+1)*4);
t->ioperm = 1;
/*
* this activates it in the TSS
*/
tss->io_map_base = IO_BITMAP_OFFSET; tss->io_map_base = IO_BITMAP_OFFSET;
} }
/* /*
* do it in the per-thread copy and in the TSS ... * do it in the per-thread copy and in the TSS ...
*/ */
set_bitmap((unsigned long *) t->io_bitmap, from, num, !turn_on); set_bitmap((unsigned long *) t->io_bitmap_ptr, from, num, !turn_on);
set_bitmap((unsigned long *) tss->io_bitmap, from, num, !turn_on); set_bitmap((unsigned long *) tss->io_bitmap, from, num, !turn_on);
return 0; return 0;
......
This diff is collapsed.
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/reboot.h> #include <linux/reboot.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/slab.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -320,9 +321,6 @@ void show_regs(struct pt_regs * regs) ...@@ -320,9 +321,6 @@ void show_regs(struct pt_regs * regs)
printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
} }
#define __STR(x) #x
#define __STR2(x) __STR(x)
extern void load_gs_index(unsigned); extern void load_gs_index(unsigned);
/* /*
...@@ -330,7 +328,13 @@ extern void load_gs_index(unsigned); ...@@ -330,7 +328,13 @@ extern void load_gs_index(unsigned);
*/ */
void exit_thread(void) void exit_thread(void)
{ {
/* nothing to do ... */ struct task_struct *me = current;
if (me->thread.io_bitmap_ptr) {
kfree(me->thread.io_bitmap_ptr);
me->thread.io_bitmap_ptr = NULL;
(init_tss + smp_processor_id())->io_map_base =
INVALID_IO_BITMAP_OFFSET;
}
} }
void flush_thread(void) void flush_thread(void)
...@@ -392,6 +396,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, ...@@ -392,6 +396,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
unlazy_fpu(current); unlazy_fpu(current);
p->thread.i387 = current->thread.i387; p->thread.i387 = current->thread.i387;
if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
p->thread.io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr)
return -ENOMEM;
memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
(IO_BITMAP_SIZE+1)*4);
}
return 0; return 0;
} }
...@@ -491,21 +503,14 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -491,21 +503,14 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* /*
* Handle the IO bitmap * Handle the IO bitmap
*/ */
if (unlikely(prev->ioperm || next->ioperm)) { if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
if (next->ioperm) { if (next->io_bitmap_ptr) {
/* /*
* 4 cachelines copy ... not good, but not that * 4 cachelines copy ... not good, but not that
* bad either. Anyone got something better? * bad either. Anyone got something better?
* This only affects processes which use ioperm(). * This only affects processes which use ioperm().
* [Putting the TSSs into 4k-tlb mapped regions
* and playing VM tricks to switch the IO bitmap
* is not really acceptable.]
* On x86-64 we could put multiple bitmaps into
* the GDT and just switch offsets
* This would require ugly special cases on overflow
* though -AK
*/ */
memcpy(tss->io_bitmap, next->io_bitmap, memcpy(tss->io_bitmap, next->io_bitmap_ptr,
IO_BITMAP_SIZE*sizeof(u32)); IO_BITMAP_SIZE*sizeof(u32));
tss->io_map_base = IO_BITMAP_OFFSET; tss->io_map_base = IO_BITMAP_OFFSET;
} else { } else {
......
...@@ -91,6 +91,9 @@ void pda_init(int cpu) ...@@ -91,6 +91,9 @@ void pda_init(int cpu)
pda->me = pda; pda->me = pda;
pda->cpudata_offset = 0; pda->cpudata_offset = 0;
pda->active_mm = &init_mm;
pda->mmu_state = 0;
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
wrmsrl(MSR_GS_BASE, cpu_pda + cpu); wrmsrl(MSR_GS_BASE, cpu_pda + cpu);
} }
......
...@@ -84,7 +84,6 @@ struct rt_sigframe ...@@ -84,7 +84,6 @@ struct rt_sigframe
char *pretcode; char *pretcode;
struct ucontext uc; struct ucontext uc;
struct siginfo info; struct siginfo info;
struct _fpstate fpstate;
}; };
static int static int
...@@ -186,8 +185,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs) ...@@ -186,8 +185,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
*/ */
static int static int
setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, unsigned long mask)
struct pt_regs *regs, unsigned long mask)
{ {
int tmp, err = 0; int tmp, err = 0;
struct task_struct *me = current; struct task_struct *me = current;
...@@ -221,20 +219,17 @@ setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate, ...@@ -221,20 +219,17 @@ setup_sigcontext(struct sigcontext *sc, struct _fpstate *fpstate,
err |= __put_user(mask, &sc->oldmask); err |= __put_user(mask, &sc->oldmask);
err |= __put_user(me->thread.cr2, &sc->cr2); err |= __put_user(me->thread.cr2, &sc->cr2);
tmp = save_i387(fpstate);
if (tmp < 0)
err = 1;
else
err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
return err; return err;
} }
/* /*
* Determine which stack to use.. * Determine which stack to use..
*/ */
static inline struct rt_sigframe *
get_sigframe(struct k_sigaction *ka, struct pt_regs * regs) #define round_down(p, r) ((void *) ((unsigned long)((p) - (r) + 1) & ~((r)-1)))
static void *
get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
{ {
unsigned long rsp; unsigned long rsp;
...@@ -247,22 +242,34 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs) ...@@ -247,22 +242,34 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs)
rsp = current->sas_ss_sp + current->sas_ss_size; rsp = current->sas_ss_sp + current->sas_ss_size;
} }
rsp = (rsp - sizeof(struct _fpstate)) & ~(15UL); return round_down(rsp - size, 16);
rsp -= offsetof(struct rt_sigframe, fpstate);
return (struct rt_sigframe *) rsp;
} }
static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs * regs) sigset_t *set, struct pt_regs * regs)
{ {
struct rt_sigframe *frame; struct rt_sigframe *frame = NULL;
struct _fpstate *fp = NULL;
int err = 0; int err = 0;
frame = get_sigframe(ka, regs); if (current->used_math) {
fp = get_stack(ka, regs, sizeof(struct _fpstate));
frame = round_down((char *)fp - sizeof(struct rt_sigframe), 16) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) {
goto give_sigsegv; goto give_sigsegv;
}
if (save_i387(fp) < 0)
err |= -1;
}
if (!frame)
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) {
goto give_sigsegv;
}
if (ka->sa.sa_flags & SA_SIGINFO) { if (ka->sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, info); err |= copy_siginfo_to_user(&frame->info, info);
...@@ -278,14 +285,10 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ...@@ -278,14 +285,10 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(sas_ss_flags(regs->rsp), err |= __put_user(sas_ss_flags(regs->rsp),
&frame->uc.uc_stack.ss_flags); &frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]);
regs, set->sig[0]); err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err) {
goto give_sigsegv;
}
/* Set up to return from userspace. If provided, use a stub /* Set up to return from userspace. If provided, use a stub
already in userspace. */ already in userspace. */
/* x86-64 should always use SA_RESTORER. */ /* x86-64 should always use SA_RESTORER. */
...@@ -297,7 +300,6 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ...@@ -297,7 +300,6 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
} }
if (err) { if (err) {
printk("fault 3\n");
goto give_sigsegv; goto give_sigsegv;
} }
...@@ -305,7 +307,6 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ...@@ -305,7 +307,6 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax); printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
#endif #endif
/* Set up registers for signal handler */ /* Set up registers for signal handler */
{ {
struct exec_domain *ed = current_thread_info()->exec_domain; struct exec_domain *ed = current_thread_info()->exec_domain;
...@@ -320,9 +321,10 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, ...@@ -320,9 +321,10 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
next argument after the signal number on the stack. */ next argument after the signal number on the stack. */
regs->rsi = (unsigned long)&frame->info; regs->rsi = (unsigned long)&frame->info;
regs->rdx = (unsigned long)&frame->uc; regs->rdx = (unsigned long)&frame->uc;
regs->rsp = (unsigned long) frame;
regs->rip = (unsigned long) ka->sa.sa_handler; regs->rip = (unsigned long) ka->sa.sa_handler;
regs->rsp = (unsigned long)frame;
set_fs(USER_DS); set_fs(USER_DS);
regs->eflags &= ~TF_MASK; regs->eflags &= ~TF_MASK;
......
...@@ -25,8 +25,6 @@ ...@@ -25,8 +25,6 @@
/* The 'big kernel lock' */ /* The 'big kernel lock' */
spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
struct tlb_state cpu_tlbstate[NR_CPUS] = {[0 ... NR_CPUS-1] = { &init_mm, 0 }};
/* /*
* the following functions deal with sending IPIs between CPUs. * the following functions deal with sending IPIs between CPUs.
* *
...@@ -147,9 +145,9 @@ static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED; ...@@ -147,9 +145,9 @@ static spinlock_t tlbstate_lock = SPIN_LOCK_UNLOCKED;
*/ */
static void inline leave_mm (unsigned long cpu) static void inline leave_mm (unsigned long cpu)
{ {
if (cpu_tlbstate[cpu].state == TLBSTATE_OK) if (read_pda(mmu_state) == TLBSTATE_OK)
BUG(); BUG();
clear_bit(cpu, &cpu_tlbstate[cpu].active_mm->cpu_vm_mask); clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask);
__flush_tlb(); __flush_tlb();
} }
...@@ -164,18 +162,18 @@ static void inline leave_mm (unsigned long cpu) ...@@ -164,18 +162,18 @@ static void inline leave_mm (unsigned long cpu)
* the other cpus, but smp_invalidate_interrupt ignore flush ipis * the other cpus, but smp_invalidate_interrupt ignore flush ipis
* for the wrong mm, and in the worst case we perform a superflous * for the wrong mm, and in the worst case we perform a superflous
* tlb flush. * tlb flush.
* 1a2) set cpu_tlbstate to TLBSTATE_OK * 1a2) set cpu mmu_state to TLBSTATE_OK
* Now the smp_invalidate_interrupt won't call leave_mm if cpu0 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
* was in lazy tlb mode. * was in lazy tlb mode.
* 1a3) update cpu_tlbstate[].active_mm * 1a3) update cpu active_mm
* Now cpu0 accepts tlb flushes for the new mm. * Now cpu0 accepts tlb flushes for the new mm.
* 1a4) set_bit(cpu, &new_mm->cpu_vm_mask); * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask);
* Now the other cpus will send tlb flush ipis. * Now the other cpus will send tlb flush ipis.
* 1a4) change cr3. * 1a4) change cr3.
* 1b) thread switch without mm change * 1b) thread switch without mm change
* cpu_tlbstate[].active_mm is correct, cpu0 already handles * cpu active_mm is correct, cpu0 already handles
* flush ipis. * flush ipis.
* 1b1) set cpu_tlbstate to TLBSTATE_OK * 1b1) set cpu mmu_state to TLBSTATE_OK
* 1b2) test_and_set the cpu bit in cpu_vm_mask. * 1b2) test_and_set the cpu bit in cpu_vm_mask.
* Atomically set the bit [other cpus will start sending flush ipis], * Atomically set the bit [other cpus will start sending flush ipis],
* and test the bit. * and test the bit.
...@@ -188,7 +186,7 @@ static void inline leave_mm (unsigned long cpu) ...@@ -188,7 +186,7 @@ static void inline leave_mm (unsigned long cpu)
* runs in kernel space, the cpu could load tlb entries for user space * runs in kernel space, the cpu could load tlb entries for user space
* pages. * pages.
* *
* The good news is that cpu_tlbstate is local to each cpu, no * The good news is that cpu mmu_state is local to each cpu, no
* write/read ordering problems. * write/read ordering problems.
*/ */
...@@ -216,8 +214,8 @@ asmlinkage void smp_invalidate_interrupt (void) ...@@ -216,8 +214,8 @@ asmlinkage void smp_invalidate_interrupt (void)
* BUG(); * BUG();
*/ */
if (flush_mm == cpu_tlbstate[cpu].active_mm) { if (flush_mm == read_pda(active_mm)) {
if (cpu_tlbstate[cpu].state == TLBSTATE_OK) { if (read_pda(mmu_state) == TLBSTATE_OK) {
if (flush_va == FLUSH_ALL) if (flush_va == FLUSH_ALL)
local_flush_tlb(); local_flush_tlb();
else else
...@@ -335,7 +333,7 @@ static inline void do_flush_tlb_all_local(void) ...@@ -335,7 +333,7 @@ static inline void do_flush_tlb_all_local(void)
unsigned long cpu = smp_processor_id(); unsigned long cpu = smp_processor_id();
__flush_tlb_all(); __flush_tlb_all();
if (cpu_tlbstate[cpu].state == TLBSTATE_LAZY) if (read_pda(mmu_state) == TLBSTATE_LAZY)
leave_mm(cpu); leave_mm(cpu);
} }
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define NO_VSYSCALL 1 //#define NO_VSYSCALL 1
#ifdef NO_VSYSCALL #ifdef NO_VSYSCALL
#include <asm/unistd.h> #include <asm/unistd.h>
......
...@@ -189,3 +189,5 @@ EXPORT_SYMBOL_NOVERS(do_softirq_thunk); ...@@ -189,3 +189,5 @@ EXPORT_SYMBOL_NOVERS(do_softirq_thunk);
void out_of_line_bug(void); void out_of_line_bug(void);
EXPORT_SYMBOL(out_of_line_bug); EXPORT_SYMBOL(out_of_line_bug);
EXPORT_SYMBOL(init_level4_pgt);
...@@ -12,7 +12,7 @@ obj-y = csum-partial.o csum-copy.o csum-wrappers.o delay.o \ ...@@ -12,7 +12,7 @@ obj-y = csum-partial.o csum-copy.o csum-wrappers.o delay.o \
thunk.o io.o clear_page.o copy_page.o thunk.o io.o clear_page.o copy_page.o
obj-y += memcpy.o obj-y += memcpy.o
obj-y += memmove.o obj-y += memmove.o
#obj-y += memset.o obj-y += memset.o
obj-y += copy_user.o obj-y += copy_user.o
export-objs := io.o csum-wrappers.o csum-partial.o export-objs := io.o csum-wrappers.o csum-partial.o
......
/* Copyright 2002 Andi Kleen, SuSE Labs */ /* Copyright 2002 Andi Kleen */
// #define FIX_ALIGNMENT 1
/* /*
* ISO C memset - set a memory block to a byte value. * ISO C memset - set a memory block to a byte value.
...@@ -11,51 +9,51 @@ ...@@ -11,51 +9,51 @@
* *
* rax original destination * rax original destination
*/ */
.globl ____memset .globl __memset
.globl memset
.p2align .p2align
____memset: memset:
movq %rdi,%r10 /* save destination for return address */ __memset:
movq %rdx,%r11 /* save count */ movq %rdi,%r10
movq %rdx,%r11
/* expand byte value */ /* expand byte value */
movzbl %sil,%ecx /* zero extend char value */ movzbl %sil,%ecx
movabs $0x0101010101010101,%rax /* expansion pattern */ movabs $0x0101010101010101,%rax
mul %rcx /* expand with rax, clobbers rdx */ mul %rcx /* with rax, clobbers rdx */
#ifdef FIX_ALIGNMENT
/* align dst */ /* align dst */
movl %edi,%r9d movl %edi,%r9d
andl $7,%r9d /* test unaligned bits */ andl $7,%r9d
jnz bad_alignment jnz bad_alignment
after_bad_alignment: after_bad_alignment:
#endif
movq %r11,%rcx /* restore count */ movq %r11,%rcx
shrq $6,%rcx /* divide by 64 */ movl $64,%r8d
jz handle_tail /* block smaller than 64 bytes? */ shrq $6,%rcx
movl $64,%r8d /* CSE loop block size */ jz handle_tail
loop_64: loop_64:
movnti %rax,0*8(%rdi) movnti %rax,(%rdi)
movnti %rax,1*8(%rdi) movnti %rax,8(%rdi)
movnti %rax,2*8(%rdi) movnti %rax,16(%rdi)
movnti %rax,3*8(%rdi) movnti %rax,24(%rdi)
movnti %rax,4*8(%rdi) movnti %rax,32(%rdi)
movnti %rax,5*8(%rdi) movnti %rax,40(%rdi)
movnti %rax,6*8(%rdi) movnti %rax,48(%rdi)
movnti %rax,7*8(%rdi) /* clear 64 byte blocks */ movnti %rax,56(%rdi)
addq %r8,%rdi /* increase pointer by 64 bytes */ addq %r8,%rdi
loop loop_64 /* decrement rcx and if not zero loop */ loop loop_64
/* Handle tail in loops. The loops should be faster than hard /* Handle tail in loops. The loops should be faster than hard
to predict jump tables. */ to predict jump tables. */
handle_tail: handle_tail:
movl %r11d,%ecx movl %r11d,%ecx
andl $63,%ecx andl $63&(~7),%ecx
shrl $3,%ecx
jz handle_7 jz handle_7
shrl $3,%ecx
loop_8: loop_8:
movnti %rax,(%rdi) /* long words */ movnti %rax,(%rdi)
addq $8,%rdi addq $8,%rdi
loop loop_8 loop loop_8
...@@ -64,22 +62,20 @@ handle_7: ...@@ -64,22 +62,20 @@ handle_7:
andl $7,%ecx andl $7,%ecx
jz ende jz ende
loop_1: loop_1:
movb %al,(%rdi) /* bytes */ movb %al,(%rdi)
incq %rdi addq $1,%rdi
loop loop_1 loop loop_1
ende: ende:
movq %r10,%rax movq %r10,%rax
ret ret
#ifdef FIX_ALIGNMENT
bad_alignment: bad_alignment:
andq $-8,%r11 /* shorter than 8 bytes */ cmpq $7,%r11
jz handle_7 /* if yes handle it in the tail code */ jbe handle_7
movnti %rax,(%rdi) /* unaligned store of 8 bytes */ movnti %rax,(%rdi) /* unaligned store */
movq $8,%r8 movq $8,%r8
subq %r9,%r8 /* compute alignment (8-misalignment) */ subq %r9,%r8
addq %r8,%rdi /* fix destination */ addq %r8,%rdi
subq %r8,%r11 /* fix count */ subq %r8,%r11
jmp after_bad_alignment jmp after_bad_alignment
#endif
...@@ -16,11 +16,22 @@ ...@@ -16,11 +16,22 @@
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/sigcontext.h> #include <asm/sigcontext.h>
#include <asm/user.h> #include <asm/user.h>
#include <asm/thread_info.h>
extern void fpu_init(void); extern void fpu_init(void);
extern void init_fpu(void); extern void init_fpu(void);
int save_i387(struct _fpstate *buf); int save_i387(struct _fpstate *buf);
static inline int need_signal_i387(struct task_struct *me)
{
if (!me->used_math)
return 0;
me->used_math = 0;
if (!test_thread_flag(TIF_USEDFPU))
return 0;
return 1;
}
/* /*
* FPU lazy state save handling... * FPU lazy state save handling...
*/ */
......
...@@ -18,7 +18,9 @@ typedef int __kernel_clock_t32; ...@@ -18,7 +18,9 @@ typedef int __kernel_clock_t32;
typedef int __kernel_pid_t32; typedef int __kernel_pid_t32;
typedef unsigned short __kernel_ipc_pid_t32; typedef unsigned short __kernel_ipc_pid_t32;
typedef unsigned short __kernel_uid_t32; typedef unsigned short __kernel_uid_t32;
typedef unsigned __kernel_uid32_t32;
typedef unsigned short __kernel_gid_t32; typedef unsigned short __kernel_gid_t32;
typedef unsigned __kernel_gid32_t32;
typedef unsigned short __kernel_dev_t32; typedef unsigned short __kernel_dev_t32;
typedef unsigned int __kernel_ino_t32; typedef unsigned int __kernel_ino_t32;
typedef unsigned short __kernel_mode_t32; typedef unsigned short __kernel_mode_t32;
......
#ifndef __i386_IPC_H__ #ifndef __i386_IPC_H__
#define __i386_IPC_H__ #define __i386_IPC_H__
/* /* dummy */
* These are used to wrap system calls on x86.
*
* See arch/i386/kernel/sys_i386.c for ugly details..
*
* (on x86-64 only used for 32bit emulation)
*/
struct ipc_kludge {
struct msgbuf *msgp;
long msgtyp;
};
#define SEMOP 1
#define SEMGET 2
#define SEMCTL 3
#define MSGSND 11
#define MSGRCV 12
#define MSGGET 13
#define MSGCTL 14
#define SHMAT 21
#define SHMDT 22
#define SHMGET 23
#define SHMCTL 24
/* Used by the DIPC package, try and avoid reusing it */
#define DIPC 25
#define IPCCALL(version,op) ((version)<<16 | (op))
#endif #endif
...@@ -19,8 +19,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm); ...@@ -19,8 +19,8 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
{ {
if(cpu_tlbstate[cpu].state == TLBSTATE_OK) if (read_pda(mmu_state) == TLBSTATE_OK)
cpu_tlbstate[cpu].state = TLBSTATE_LAZY; write_pda(mmu_state, TLBSTATE_LAZY);
} }
#else #else
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu)
...@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ...@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
/* stop flush ipis for the previous mm */ /* stop flush ipis for the previous mm */
clear_bit(cpu, &prev->cpu_vm_mask); clear_bit(cpu, &prev->cpu_vm_mask);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
cpu_tlbstate[cpu].state = TLBSTATE_OK; write_pda(mmu_state, TLBSTATE_OK);
cpu_tlbstate[cpu].active_mm = next; write_pda(active_mm, next);
#endif #endif
set_bit(cpu, &next->cpu_vm_mask); set_bit(cpu, &next->cpu_vm_mask);
/* Re-load page tables */ /* Re-load page tables */
...@@ -48,8 +48,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ...@@ -48,8 +48,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
else { else {
cpu_tlbstate[cpu].state = TLBSTATE_OK; write_pda(mmu_state, TLBSTATE_OK);
if(cpu_tlbstate[cpu].active_mm != next) if (read_pda(active_mm) != next)
out_of_line_bug(); out_of_line_bug();
if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) { if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) {
/* We were in lazy tlb mode and leave_mm disabled /* We were in lazy tlb mode and leave_mm disabled
......
...@@ -95,6 +95,7 @@ ...@@ -95,6 +95,7 @@
#define MSR_IA32_PERFCTR0 0xc1 #define MSR_IA32_PERFCTR0 0xc1
#define MSR_IA32_PERFCTR1 0xc2 #define MSR_IA32_PERFCTR1 0xc2
#define MSR_MTRRcap 0x0fe
#define MSR_IA32_BBL_CR_CTL 0x119 #define MSR_IA32_BBL_CR_CTL 0x119
#define MSR_IA32_MCG_CAP 0x179 #define MSR_IA32_MCG_CAP 0x179
...@@ -110,6 +111,19 @@ ...@@ -110,6 +111,19 @@
#define MSR_IA32_LASTINTFROMIP 0x1dd #define MSR_IA32_LASTINTFROMIP 0x1dd
#define MSR_IA32_LASTINTTOIP 0x1de #define MSR_IA32_LASTINTTOIP 0x1de
#define MSR_MTRRfix64K_00000 0x250
#define MSR_MTRRfix16K_80000 0x258
#define MSR_MTRRfix16K_A0000 0x259
#define MSR_MTRRfix4K_C0000 0x268
#define MSR_MTRRfix4K_C8000 0x269
#define MSR_MTRRfix4K_D0000 0x26a
#define MSR_MTRRfix4K_D8000 0x26b
#define MSR_MTRRfix4K_E0000 0x26c
#define MSR_MTRRfix4K_E8000 0x26d
#define MSR_MTRRfix4K_F0000 0x26e
#define MSR_MTRRfix4K_F8000 0x26f
#define MSR_MTRRdefType 0x2ff
#define MSR_IA32_MC0_CTL 0x400 #define MSR_IA32_MC0_CTL 0x400
#define MSR_IA32_MC0_STATUS 0x401 #define MSR_IA32_MC0_STATUS 0x401
#define MSR_IA32_MC0_ADDR 0x402 #define MSR_IA32_MC0_ADDR 0x402
...@@ -171,11 +185,4 @@ ...@@ -171,11 +185,4 @@
#define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12) #define MSR_IA32_APICBASE_BASE (0xfffff<<12)
#define MSR_IA32_THERM_CONTROL 0x19a
#define MSR_IA32_THERM_INTERRUPT 0x19b
#define MSR_IA32_THERM_STATUS 0x19c
#define MSR_IA32_MISC_ENABLE 0x1a0
#endif #endif
...@@ -30,16 +30,16 @@ ...@@ -30,16 +30,16 @@
struct mtrr_sentry struct mtrr_sentry
{ {
unsigned long base; /* Base address */ __u64 base; /* Base address */
unsigned long size; /* Size of region */ __u32 size; /* Size of region */
unsigned int type; /* Type of region */ unsigned int type; /* Type of region */
}; };
struct mtrr_gentry struct mtrr_gentry
{ {
__u64 base; /* Base address */
__u32 size; /* Size of region */
unsigned int regnum; /* Register number */ unsigned int regnum; /* Register number */
unsigned long base; /* Base address */
unsigned long size; /* Size of region */
unsigned int type; /* Type of region */ unsigned int type; /* Type of region */
}; };
...@@ -81,46 +81,38 @@ static char *mtrr_strings[MTRR_NUM_TYPES] = ...@@ -81,46 +81,38 @@ static char *mtrr_strings[MTRR_NUM_TYPES] =
#ifdef __KERNEL__ #ifdef __KERNEL__
/* The following functions are for use by other drivers */ /* The following functions are for use by other drivers */
# ifdef CONFIG_MTRR #ifdef CONFIG_MTRR
extern int mtrr_add (unsigned long base, unsigned long size, extern int mtrr_add (__u64 base, __u32 size, unsigned int type, char increment);
unsigned int type, char increment); extern int mtrr_add_page (__u64 base, __u32 size, unsigned int type, char increment);
extern int mtrr_add_page (unsigned long base, unsigned long size, extern int mtrr_del (int reg, __u64 base, __u32 size);
unsigned int type, char increment); extern int mtrr_del_page (int reg, __u64 base, __u32 size);
extern int mtrr_del (int reg, unsigned long base, unsigned long size); #else
extern int mtrr_del_page (int reg, unsigned long base, unsigned long size); static __inline__ int mtrr_add (__u64 base, __u32 size,
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
# else
static __inline__ int mtrr_add (unsigned long base, unsigned long size,
unsigned int type, char increment) unsigned int type, char increment)
{ {
return -ENODEV; return -ENODEV;
} }
static __inline__ int mtrr_add_page (unsigned long base, unsigned long size, static __inline__ int mtrr_add_page (__u64 base, __u32 size,
unsigned int type, char increment) unsigned int type, char increment)
{ {
return -ENODEV; return -ENODEV;
} }
static __inline__ int mtrr_del (int reg, unsigned long base, static __inline__ int mtrr_del (int reg, __u64 base, __u32 size)
unsigned long size)
{ {
return -ENODEV; return -ENODEV;
} }
static __inline__ int mtrr_del_page (int reg, unsigned long base, static __inline__ int mtrr_del_page (int reg, __u64 base, __u32 size)
unsigned long size)
{ {
return -ENODEV; return -ENODEV;
} }
#endif
static __inline__ void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) {;}
# endif
/* The following functions are for initialisation: don't use them! */ /* The following functions are for initialisation: don't use them! */
extern int mtrr_init (void); extern int mtrr_init (void);
# if defined(CONFIG_SMP) && defined(CONFIG_MTRR) #if defined(CONFIG_SMP) && defined(CONFIG_MTRR)
extern void mtrr_init_boot_cpu (void); extern void mtrr_init_boot_cpu (void);
extern void mtrr_init_secondary_cpu (void); extern void mtrr_init_secondary_cpu (void);
# endif #endif
#endif #endif
......
...@@ -22,6 +22,8 @@ struct x8664_pda { ...@@ -22,6 +22,8 @@ struct x8664_pda {
unsigned int __local_bh_count; unsigned int __local_bh_count;
unsigned int __nmi_count; /* arch dependent */ unsigned int __nmi_count; /* arch dependent */
struct task_struct * __ksoftirqd_task; /* waitqueue is too large */ struct task_struct * __ksoftirqd_task; /* waitqueue is too large */
struct mm_struct *active_mm;
int mmu_state;
} ____cacheline_aligned; } ____cacheline_aligned;
#define PDA_STACKOFFSET (5*8) #define PDA_STACKOFFSET (5*8)
......
...@@ -45,21 +45,12 @@ struct cpuinfo_x86 { ...@@ -45,21 +45,12 @@ struct cpuinfo_x86 {
__u8 x86_vendor; /* CPU vendor */ __u8 x86_vendor; /* CPU vendor */
__u8 x86_model; __u8 x86_model;
__u8 x86_mask; __u8 x86_mask;
/* We know that wp_works_ok = 1, hlt_works_ok = 1, hard_math = 1,
etc... */
char wp_works_ok; /* It doesn't on 386's */
char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */
char hard_math;
char rfu;
int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */
__u32 x86_capability[NCAPINTS]; __u32 x86_capability[NCAPINTS];
char x86_vendor_id[16]; char x86_vendor_id[16];
char x86_model_id[64]; char x86_model_id[64];
int x86_cache_size; /* in KB - valid for CPUS which support this int x86_cache_size; /* in KB - valid for CPUS which support this
call */ call */
int fdiv_bug;
int f00f_bug;
int coma_bug;
unsigned long loops_per_jiffy; unsigned long loops_per_jiffy;
} ____cacheline_aligned; } ____cacheline_aligned;
...@@ -323,7 +314,7 @@ struct thread_struct { ...@@ -323,7 +314,7 @@ struct thread_struct {
/* IO permissions. the bitmap could be moved into the GDT, that would make /* IO permissions. the bitmap could be moved into the GDT, that would make
switch faster for a limited number of ioperm using tasks. -AK */ switch faster for a limited number of ioperm using tasks. -AK */
int ioperm; int ioperm;
u32 io_bitmap[IO_BITMAP_SIZE+1]; u32 *io_bitmap_ptr;
}; };
#define INIT_THREAD { \ #define INIT_THREAD { \
......
...@@ -15,7 +15,7 @@ extern int printk(const char * fmt, ...) ...@@ -15,7 +15,7 @@ extern int printk(const char * fmt, ...)
typedef struct { typedef struct {
volatile unsigned int lock; volatile unsigned int lock;
#ifdef CONFIG_DEBUG_SPINLOCK #if SPINLOCK_DEBUG
unsigned magic; unsigned magic;
#endif #endif
} spinlock_t; } spinlock_t;
...@@ -39,7 +39,7 @@ typedef struct { ...@@ -39,7 +39,7 @@ typedef struct {
* We make no fairness assumptions. They have a cost. * We make no fairness assumptions. They have a cost.
*/ */
#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0) #define spin_is_locked(x) (*(volatile signed char *)(&(x)->lock) <= 0)
#define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x)) #define spin_unlock_wait(x) do { barrier(); } while(spin_is_locked(x))
#define spin_lock_string \ #define spin_lock_string \
...@@ -62,7 +62,7 @@ typedef struct { ...@@ -62,7 +62,7 @@ typedef struct {
static inline int _raw_spin_trylock(spinlock_t *lock) static inline int _raw_spin_trylock(spinlock_t *lock)
{ {
char oldval; signed char oldval;
__asm__ __volatile__( __asm__ __volatile__(
"xchgb %b0,%1" "xchgb %b0,%1"
:"=q" (oldval), "=m" (lock->lock) :"=q" (oldval), "=m" (lock->lock)
......
...@@ -40,18 +40,9 @@ extern void *__memcpy(void *to, const void *from, size_t len); ...@@ -40,18 +40,9 @@ extern void *__memcpy(void *to, const void *from, size_t len);
__ret = __builtin_memcpy((dst),(src),__len); \ __ret = __builtin_memcpy((dst),(src),__len); \
__ret; }) __ret; })
#if 0
#define __HAVE_ARCH_MEMSET #define __HAVE_ARCH_MEMSET
extern void *__memset(void *mem, int val, size_t len); #define memset __builtin_memset
#define memset(dst,val,len) \
({ size_t __len = (len); \
void *__ret; \
if (__builtin_constant_p(len) && __len >= 64) \
__ret = __memset((dst),(val),__len); \
else \
__ret = __builtin_memset((dst),(val),__len); \
__ret; })
#endif
#define __HAVE_ARCH_MEMMOVE #define __HAVE_ARCH_MEMMOVE
void * memmove(void * dest,const void *src,size_t count); void * memmove(void * dest,const void *src,size_t count);
......
#ifndef SUSPEND_H
#define SUSPEND_H 1
/* dummy for now */
#endif
...@@ -13,7 +13,10 @@ ...@@ -13,7 +13,10 @@
#define LOCK_PREFIX "" #define LOCK_PREFIX ""
#endif #endif
#define prepare_to_switch() do {} while(0) #define prepare_arch_schedule(prev) do { } while(0)
#define finish_arch_schedule(prev) do { } while(0)
#define prepare_arch_switch(rq) do { } while(0)
#define finish_arch_switch(rq) spin_unlock_irq(&(rq)->lock)
#define __STR(x) #x #define __STR(x) #x
#define STR(x) __STR(x) #define STR(x) __STR(x)
...@@ -41,7 +44,7 @@ ...@@ -41,7 +44,7 @@
__POP(rax) __POP(r15) __POP(r14) __POP(r13) __POP(r12) __POP(r11) __POP(r10) \ __POP(rax) __POP(r15) __POP(r14) __POP(r13) __POP(r12) __POP(r11) __POP(r10) \
__POP(r9) __POP(r8) __POP(r9) __POP(r8)
#define switch_to(prev,next) \ #define switch_to(prev,next,last) \
asm volatile(SAVE_CONTEXT \ asm volatile(SAVE_CONTEXT \
"movq %%rsp,%[prevrsp]\n\t" \ "movq %%rsp,%[prevrsp]\n\t" \
"movq %[nextrsp],%%rsp\n\t" \ "movq %[nextrsp],%%rsp\n\t" \
......
...@@ -48,6 +48,4 @@ static inline cycles_t get_cycles (void) ...@@ -48,6 +48,4 @@ static inline cycles_t get_cycles (void)
extern unsigned int cpu_khz; extern unsigned int cpu_khz;
#define ARCH_HAS_JIFFIES_64
#endif #endif
...@@ -106,15 +106,6 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st ...@@ -106,15 +106,6 @@ static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long st
#define TLBSTATE_OK 1 #define TLBSTATE_OK 1
#define TLBSTATE_LAZY 2 #define TLBSTATE_LAZY 2
struct tlb_state
{
struct mm_struct *active_mm;
int state;
char __cacheline_padding[24];
};
extern struct tlb_state cpu_tlbstate[NR_CPUS];
#endif #endif
#define flush_tlb_kernel_range(start, end) flush_tlb_all() #define flush_tlb_kernel_range(start, end) flush_tlb_all()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment