Commit 1595982c authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] Misc core changes for x86-64/2.5.42

And here all the other x86-64 changes that have accumulated in my tree.

It's various bugfixes and cleanups.

Changes:

 - fix nmi watchdog
 - remove local timer spreading over CPUs - it's useless here and caused many problems
 - New offset.h computation from Kai
 - Lots of changes for the C99 initializer syntax
 - New MTRR driver from Dave & Mats
 - Bugfix: kernel threads don't start with interrupts disabled anymore, which fixes
   various boottime hangs (this was fixed a long time ago, but the bug crept in again
   by the backdoor)
 - Do %gs reload in context switch lockless
 - Fix device_not_available entry point race
 - New per CPU GDT layout following i386: the layot is not completely
   compatible with i386, which may problems with Wine in theory.
   Haven't seen any yet.
 - Support disableapic option
 - driverfs support removed for now because it caused crashes
 - Updates for new signal setup
 - Support for kallsyms
 - Port TLS clone flags/syscalls: unfortunately made the context switch
   even uglier than it already is.
 - Security fixes for ptrace
 - New in_interrupt()/atomic setup ported from i386
 - New makefiles mostly from Kai
 - Various updates ported from i386
parent 1c41006f
......@@ -2,15 +2,14 @@
# Makefile for the linux kernel.
#
O_TARGET := kernel.o
EXTRA_TARGETS := head.o head64.o init_task.o
export-objs := mtrr.o x8664_ksyms.o
export-objs := mtrr.o x8664_ksyms.o pci-gart.o
obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_x86_64.o \
pci-dma.o x8664_ksyms.o i387.o syscall.o vsyscall.o \
setup64.o bluesmoke.o bootflag.o
setup64.o bluesmoke.o bootflag.o e820.o reboot.o
obj-$(CONFIG_MTRR) += mtrr.o
obj-$(CONFIG_X86_MSR) += msr.o
......@@ -18,9 +17,11 @@ obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
#obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI) += acpi.o
#obj-$(CONFIG_ACPI_SLEEP) += acpi_wakeup.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
EXTRA_AFLAGS := -traditional
......
......@@ -30,6 +30,8 @@
#include <asm/mpspec.h>
#include <asm/pgalloc.h>
int disable_apic_timer __initdata;
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;
......@@ -598,7 +600,7 @@ static int __init detect_init_APIC (void)
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1)
if (boot_cpu_data.x86 > 6)
break;
goto no_apic;
case X86_VENDOR_INTEL:
......@@ -640,6 +642,8 @@ static int __init detect_init_APIC (void)
if (nmi_watchdog != NMI_NONE)
nmi_watchdog = NMI_LOCAL_APIC;
apic_pm_init1();
printk("Found and enabled local APIC!\n");
return 0;
......@@ -694,59 +698,6 @@ void __init init_apic_mappings(void)
#endif
}
/*
* This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts
* per second. We assume that the caller has already set up the local
* APIC.
*
* The APIC timer is not exactly sync with the external timer chip, it
* closely follows bus clocks.
*/
/*
* The timer chip is already set up at HZ interrupts per second here,
* but we do not accept timer interrupts yet. We only allow the BP
* to calibrate.
*/
static unsigned int __init get_8254_timer_count(void)
{
extern spinlock_t i8253_lock;
unsigned long flags;
unsigned int count;
spin_lock_irqsave(&i8253_lock, flags);
outb_p(0x00, 0x43);
count = inb_p(0x40);
count |= inb_p(0x40) << 8;
spin_unlock_irqrestore(&i8253_lock, flags);
return count;
}
void __init wait_8254_wraparound(void)
{
unsigned int curr_count, prev_count=~0;
int delta;
curr_count = get_8254_timer_count();
do {
prev_count = curr_count;
curr_count = get_8254_timer_count();
delta = curr_count-prev_count;
/*
* This limit for delta seems arbitrary, but it isn't, it's
* slightly above the level of error a buggy Mercury/Neptune
* chipset timer can cause.
*/
} while (delta < 300);
}
/*
* This function sets up the local APIC timer, with a timeout of
* 'clocks' APIC bus clock. During calibration we actually call
......@@ -779,52 +730,36 @@ void __setup_APIC_LVTT(unsigned int clocks)
apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR);
}
void setup_APIC_timer(void * data)
static void setup_APIC_timer(unsigned int clocks)
{
unsigned int clocks = (unsigned long) data, slice, t0, t1;
unsigned long flags;
int delta;
local_save_flags(flags);
local_irq_enable();
/*
* ok, Intel has some smart code in their APIC that knows
* if a CPU was in 'hlt' lowpower mode, and this increases
* its APIC arbitration priority. To avoid the external timer
* IRQ APIC event being in synchron with the APIC clock we
* introduce an interrupt skew to spread out timer events.
*
* The number of slices within a 'big' timeslice is smp_num_cpus+1
*/
slice = clocks / (smp_num_cpus+1);
printk("cpu: %d, clocks: %d, slice: %d\n",
smp_processor_id(), clocks, slice);
/*
* Wait for IRQ0's slice:
*/
wait_8254_wraparound();
local_irq_save(flags);
#if 0
/* For some reasons this doesn't work on Simics, so fake it for now */
if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) {
__setup_APIC_LVTT(clocks);
return;
}
#endif
t0 = apic_read(APIC_TMICT)*APIC_DIVISOR;
/* Wait till TMCCT gets reloaded from TMICT... */
do {
t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
} while (delta >= 0);
/* Now wait for our slice for real. */
/* wait for irq slice */
{
int c1, c2;
outb_p(0x00, 0x43);
c2 = inb_p(0x40);
c2 |= inb_p(0x40) << 8;
do {
t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR;
delta = (int)(t0 - t1 - slice*(smp_processor_id()+1));
} while (delta < 0);
c1 = c2;
outb_p(0x00, 0x43);
c2 = inb_p(0x40);
c2 |= inb_p(0x40) << 8;
} while (c2 - c1 < 300);
}
__setup_APIC_LVTT(clocks);
printk("CPU%d<T0:%u,T1:%u,D:%d,S:%u,C:%u>\n",
smp_processor_id(), t0, t1, delta, slice, clocks);
local_irq_restore(flags);
}
......@@ -841,16 +776,12 @@ void setup_APIC_timer(void * data)
* APIC irq that way.
*/
#define TICK_COUNT 100000000
int __init calibrate_APIC_clock(void)
{
unsigned long t1 = 0, t2 = 0;
int tt1, tt2;
int apic, apic_start, tsc, tsc_start;
int result;
int i;
const int LOOPS = HZ/10;
printk("calibrating APIC timer ...\n");
/*
* Put whatever arbitrary (but long enough) timeout
* value into the APIC clock, we just want to get the
......@@ -858,61 +789,31 @@ int __init calibrate_APIC_clock(void)
*/
__setup_APIC_LVTT(1000000000);
/*
* The timer chip counts down to zero. Let's wait
* for a wraparound to start exact measurement:
* (the current tick might have been already half done)
*/
wait_8254_wraparound();
/*
* We wrapped around just now. Let's start:
*/
if (cpu_has_tsc)
rdtscll(t1);
tt1 = apic_read(APIC_TMCCT);
/*
* Let's wait LOOPS wraprounds:
*/
for (i = 0; i < LOOPS; i++)
wait_8254_wraparound();
tt2 = apic_read(APIC_TMCCT);
if (cpu_has_tsc)
rdtscll(t2);
/*
* The APIC bus clock counter is 32 bits only, it
* might have overflown, but note that we use signed
* longs, thus no extra care needed.
*
* underflown to be exact, as the timer counts down ;)
*/
result = (tt1-tt2)*APIC_DIVISOR/LOOPS;
printk("t1 = %ld t2 = %ld tt1 = %d tt2 = %d\n", t1, t2, tt1, tt2);
apic_start = apic_read(APIC_TMCCT);
rdtscl(tsc_start);
do {
apic = apic_read(APIC_TMCCT);
rdtscl(tsc);
} while ((tsc - tsc_start) < TICK_COUNT && (apic - apic_start) < TICK_COUNT);
if (cpu_has_tsc)
printk("..... CPU clock speed is %d.%04d MHz.\n",
((int)(t2-t1)/LOOPS)/(1000000/HZ),
((int)(t2-t1)/LOOPS)%(1000000/HZ));
result = (apic_start - apic) * 1000L * cpu_khz / (tsc - tsc_start);
printk("..... host bus clock speed is %d.%04d MHz.\n",
result/(1000000/HZ),
result%(1000000/HZ));
printk("Detected %d.%03d MHz APIC timer.\n",
result / 1000 / 1000, result / 1000 % 1000);
return result;
return result * APIC_DIVISOR / HZ;
}
static unsigned int calibration_result;
void __init setup_APIC_clocks (void)
void __init setup_boot_APIC_clock (void)
{
if (disable_apic_timer) {
printk("Disabling APIC timer\n");
return;
}
printk("Using local APIC timer interrupts.\n");
using_apic_timer = 1;
......@@ -922,12 +823,16 @@ void __init setup_APIC_clocks (void)
/*
* Now set up the timer for real.
*/
setup_APIC_timer((void *)(u64)calibration_result);
setup_APIC_timer(calibration_result);
local_irq_enable();
}
/* and update all other cpus */
smp_call_function(setup_APIC_timer, (void *)(u64)calibration_result, 1, 1);
void __init setup_secondary_APIC_clock(void)
{
local_irq_disable(); /* FIXME: Do we need this? --RR */
setup_APIC_timer(calibration_result);
local_irq_enable();
}
void __init disable_APIC_timer(void)
......@@ -1044,8 +949,6 @@ inline void smp_local_timer_interrupt(struct pt_regs *regs)
* [ if a single-CPU system runs an SMP kernel then we call the local
* interrupt as well. Thus we cannot inline the local irq ... ]
*/
unsigned int apic_timer_irqs [NR_CPUS];
void smp_apic_timer_interrupt(struct pt_regs *regs)
{
int cpu = smp_processor_id();
......@@ -1053,7 +956,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
/*
* the NMI deadlock-detector uses this.
*/
apic_timer_irqs[cpu]++;
add_pda(apic_timer_irqs, 1);
/*
* NOTE! We'd better ACK the irq immediately,
......@@ -1065,12 +968,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
* Besides, if we don't timer interrupts ignore the global
* interrupt lock, which is the WrongThing (tm) to do.
*/
irq_enter(cpu, 0);
irq_enter();
smp_local_timer_interrupt(regs);
irq_exit(cpu, 0);
if (softirq_pending(cpu))
do_softirq();
irq_exit();
}
/*
......@@ -1082,6 +982,7 @@ asmlinkage void smp_spurious_interrupt(void)
static unsigned long last_warning;
static unsigned long skipped;
irq_enter();
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
......@@ -1099,6 +1000,7 @@ asmlinkage void smp_spurious_interrupt(void)
} else {
skipped++;
}
irq_exit();
}
/*
......@@ -1109,6 +1011,7 @@ asmlinkage void smp_error_interrupt(void)
{
unsigned int v, v1;
irq_enter();
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
......@@ -1126,16 +1029,23 @@ asmlinkage void smp_error_interrupt(void)
6: Received illegal vector
7: Illegal register address
*/
printk (KERN_ERR "APIC error on CPU%d: %02x(%02x)\n",
printk (KERN_INFO "APIC error on CPU%d: %02x(%02x)\n",
smp_processor_id(), v , v1);
irq_exit();
}
int disable_apic __initdata;
/*
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
int __init APIC_init_uniprocessor (void)
{
if (disable_apic) {
printk(KERN_INFO "Apic disabled\n");
return -1;
}
if (!smp_found_config && !cpu_has_apic)
return -1;
......@@ -1166,7 +1076,21 @@ int __init APIC_init_uniprocessor (void)
if (!skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
#endif
setup_APIC_clocks();
setup_boot_APIC_clock();
return 0;
}
static __init int setup_disableapic(char *str)
{
disable_apic = 1;
}
static __init int setup_noapictimer(char *str)
{
disable_apic_timer = 1;
}
__setup("disableapic", setup_disableapic);
__setup("noapictimer", setup_noapictimer);
......@@ -24,15 +24,16 @@ int main(void)
ENTRY(state);
ENTRY(flags);
ENTRY(thread);
ENTRY(pid);
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(threadinfo__ ## entry, offsetof(struct thread_info, entry))
#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry))
ENTRY(flags);
ENTRY(addr_limit);
ENTRY(preempt_count);
BLANK();
#undef ENTRY
#define ENTRY(entry) DEFINE(pda__ ## entry, offsetof(struct x8664_pda, entry))
#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
ENTRY(kernelstack);
ENTRY(oldrsp);
ENTRY(pcurrent);
......
......@@ -39,7 +39,7 @@ static void hammer_machine_check(struct pt_regs * regs, long error_code)
recover=0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl);
preempt_disable();
for (i=0;i<banks;i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
if(high&(1<<31)) {
......@@ -64,6 +64,7 @@ static void hammer_machine_check(struct pt_regs * regs, long error_code)
wmb();
}
}
preempt_enable();
if(recover&2)
panic("CPU context corrupt");
......@@ -110,6 +111,7 @@ static void mce_checkregs (void *info)
BUG_ON (*cpu != smp_processor_id());
preempt_disable();
for (i=0; i<banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
......@@ -124,6 +126,7 @@ static void mce_checkregs (void *info)
wmb();
}
}
preempt_enable();
}
......
......@@ -146,10 +146,10 @@ static int cpuid_open(struct inode *inode, struct file *file)
* File operations we support
*/
static struct file_operations cpuid_fops = {
owner: THIS_MODULE,
llseek: cpuid_seek,
read: cpuid_read,
open: cpuid_open,
.owner = THIS_MODULE,
.llseek = cpuid_seek,
.read = cpuid_read,
.open = cpuid_open,
};
int __init cpuid_init(void)
......
......@@ -47,10 +47,10 @@ static void early_vga_write(struct console *con, const char *str, unsigned n)
}
static struct console early_vga_console = {
name: "earlyvga",
write: early_vga_write,
flags: CON_PRINTBUFFER,
index: -1,
.name = "earlyvga",
.write = early_vga_write,
.flags = CON_PRINTBUFFER,
.index = -1,
};
/* Serial functions losely based on a similar package from Klaus P. Gerlicher */
......@@ -138,10 +138,10 @@ static __init void early_serial_init(char *opt)
}
static struct console early_serial_console = {
name: "earlyser",
write: early_serial_write,
flags: CON_PRINTBUFFER,
index: -1,
.name = "earlyser",
.write = early_serial_write,
.flags = CON_PRINTBUFFER,
.index = -1,
};
/* Direct interface for emergencies */
......@@ -181,6 +181,9 @@ int __init setup_early_printk(char *opt)
if (!strncmp(buf, "serial", 6)) {
early_serial_init(buf + 6);
early_console = &early_serial_console;
} else if (!strncmp(buf, "ttyS", 4)) {
early_serial_init(buf);
early_console = &early_serial_console;
} else if (!strncmp(buf, "vga", 3)) {
early_console = &early_vga_console;
} else {
......
......@@ -84,7 +84,7 @@
xorq %rax, %rax
pushq %rax /* ss */
pushq %rax /* rsp */
pushq %rax /* eflags */
pushq $(1<<9) /* eflags - interrupts on */
pushq $__KERNEL_CS /* cs */
pushq \child_rip /* rip */
pushq %rax /* orig rax */
......@@ -236,21 +236,17 @@ badsys:
* Has correct top of stack, but partial stack frame.
*/
ENTRY(int_ret_from_sys_call)
testl $3,CS-ARGOFFSET(%rsp) # kernel syscall?
je int_restore_args
cli
testl $3,CS-ARGOFFSET(%rsp)
je retint_restore_args
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
int_with_check:
GET_THREAD_INFO(%rcx)
cli
movl threadinfo_flags(%rcx),%edx
andl %edi,%edx
jnz int_careful
int_restore_swapgs:
swapgs
int_restore_args:
RESTORE_ARGS 0,8,0
iretq
jmp retint_swapgs
/* Either reschedule or signal or syscall exit tracking needed. */
/* First do a reschedule test. */
......@@ -364,15 +360,11 @@ ENTRY(stub_rt_sigreturn)
.macro interrupt func
cld
SAVE_ARGS
#ifdef CONFIG_PREEMPT
GET_THREAD_INFO(%rdx)
incl threadinfo_preempt_count(%rdx)
#endif
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
testl $3,CS(%rdi)
je 1f
swapgs
1: addl $1,PDAREF(pda_irqcount) # XXX: should be merged with irq.c irqcount
1: addl $1,PDAREF(pda_irqcount) # RED-PEN should check preempt count
movq PDAREF(pda_irqstackptr),%rax
cmoveq %rax,%rsp
pushq %rdi # save old stack
......@@ -389,9 +381,6 @@ ret_from_intr:
leaq ARGOFFSET(%rdi),%rsp
exit_intr:
GET_THREAD_INFO(%rcx)
#ifdef CONFIG_PREEMPT
decl threadinfo_preempt_count(%rcx)
#endif
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel
......@@ -407,11 +396,24 @@ retint_check:
andl %edi,%edx
jnz retint_careful
retint_swapgs:
cli
swapgs
retint_restore_args:
cli
RESTORE_ARGS 0,8,0
iret_label:
iretq
.section __ex_table,"a"
.quad iret_label,bad_iret
.previous
.section .fixup,"ax"
/* force a signal here? this matches i386 behaviour */
bad_iret:
movq $-9999,%rdi /* better code? */
jmp do_exit
.previous
/* edi: workmask, edx: work */
retint_careful:
bt $TIF_NEED_RESCHED,%edx
......@@ -448,9 +450,8 @@ retint_kernel:
jnz retint_restore_args
bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
jnc retint_restore_args
movl PDAREF(pda___local_bh_count),%eax
addl PDAREF(pda___local_irq_count),%eax
jnz retint_restore_args
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
jc retint_restore_args
movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
sti
call schedule
......@@ -513,11 +514,6 @@ ENTRY(spurious_interrupt)
*/
ALIGN
error_entry:
testl $3,24(%rsp)
je error_kernelspace
swapgs
error_kernelspace:
sti
/* rdi slot contains rax, oldrax contains error code */
pushq %rsi
movq 8(%rsp),%rsi /* load rax */
......@@ -530,17 +526,25 @@ error_kernelspace:
pushq %r11
cld
SAVE_REST
testl $3,CS(%rsp)
je error_kernelspace
error_swapgs:
xorl %ebx,%ebx
swapgs
error_sti:
sti
movq %rdi,RDI(%rsp)
movq %rsp,%rdi
movq ORIG_RAX(%rsp),%rsi /* get error code */
movq $-1,ORIG_RAX(%rsp)
call *%rax
error_exit:
movl %ebx,%eax
RESTORE_REST
cli
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
je retint_kernel
testl %eax,%eax
jne retint_kernel
movl threadinfo_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
andl %edi,%edx
......@@ -549,6 +553,39 @@ error_exit:
RESTORE_ARGS 0,8,0
iretq
error_kernelspace:
/* There are two places in the kernel that can potentially fault with
usergs. Handle them here. */
cmpq $iret_label,RIP(%rsp)
je error_swapgs
cmpq $gs_change,RIP(%rsp)
je error_swapgs
movl $1,%ebx
jmp error_sti
/* Reload gs selector with exception handling */
/* edi: new selector */
ENTRY(load_gs_index)
pushf
cli
swapgs
gs_change:
movl %edi,%gs
2: swapgs
popf
ret
.section __ex_table,"a"
.align 8
.quad gs_change,bad_gs
.previous
.section .fixup,"ax"
bad_gs:
xorl %eax,%eax
movl %eax,%gs
jmp 2b
.previous
/*
* Create a kernel thread.
*
......@@ -564,7 +601,7 @@ ENTRY(kernel_thread)
# rdi: flags, rsi: usp, rdx: will be &pt_regs
movq %rdx,%rdi
orq kernel_thread_flags(%rip), %rdi
orq kernel_thread_flags(%rip),%rdi
movq $-1, %rsi
movq %rsp, %rdx
......@@ -573,8 +610,9 @@ ENTRY(kernel_thread)
xorl %edi,%edi
cmpq $-1000,%rax
cmovb %rdi,%rax
movq %rax,RAX(%rsp)
jnb 1f
movl tsk_pid(%rax),%eax
1: movq %rax,RAX(%rsp)
/*
* It isn't worth to check for reschedule here,
......@@ -648,18 +686,19 @@ ENTRY(simd_coprocessor_error)
zeroentry do_simd_coprocessor_error
ENTRY(device_not_available)
testl $3,8(%rsp)
pushq $-1 #error code
SAVE_ALL
movl $1,%ebx
testl $3,CS(%rsp)
je 1f
xorl %ebx,%ebx
swapgs
1: pushq $-1 #error code
SAVE_ALL
movq %cr0,%rax
1: movq %cr0,%rax
leaq math_state_restore(%rip),%rcx
leaq math_emulate(%rip),%rbx
leaq math_emulate(%rip),%rdx
testl $0x4,%eax
cmoveq %rcx,%rbx
preempt_stop
call *%rbx
cmoveq %rcx,%rdx
call *%rdx
jmp error_exit
ENTRY(debug)
......
......@@ -159,7 +159,7 @@ reach_long64:
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
lgdt pGDT64
lgdt cpu_gdt_descr
/*
* Setup up a dummy PDA. this is just for some early bootup code
......@@ -276,7 +276,7 @@ temp_boot_pmds:
.org 0x5000
ENTRY(level2_kernel_pgt)
/* 40MB kernel mapping. The kernel code cannot be bigger than that.
When you change this change KERNEL_TEXT_SIZE in pgtable.h too. */
When you change this change KERNEL_TEXT_SIZE in page.h too. */
/* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
.quad 0x0000000000000183
.quad 0x0000000000200183
......@@ -320,16 +320,18 @@ ENTRY(level3_physmem_pgt)
.org 0xb000
.data
.globl gdt
.word 0
.align 16
.word 0
pGDT64:
.word gdt_end-gdt_table
.globl cpu_gdt_descr
cpu_gdt_descr:
.word gdt_end-cpu_gdt_table
gdt:
.quad gdt_table
.quad cpu_gdt_table
#ifdef CONFIG_SMP
.rept NR_CPUS-1
.word 0
.quad 0
.endr
#endif
.align 64 /* cacheline aligned */
ENTRY(gdt_table32)
......@@ -344,8 +346,12 @@ gdt32_end:
*/
.align 64 /* cacheline aligned, keep this synchronized with asm/desc.h */
ENTRY(gdt_table)
.quad 0x0000000000000000 /* This one is magic */
/* The TLS descriptors are currently at a different place compared to i386.
Hopefully nobody expects them at a fixed place (Wine?) */
ENTRY(cpu_gdt_table)
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* unused */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
......@@ -358,15 +364,20 @@ ENTRY(gdt_table)
.word 0x00CF # granularity = 4096, 386
# (+5th nibble of limit)
/* __KERNEL32_CS */
.globl tss_start
tss_start:
.rept NR_CPUS
.quad 0,0,0,0,0,0,0,0 /* TSS/LDT/per cpu entries. filled in later */
.endr
.quad 0,0 /* TSS */
.quad 0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
.quad 0,0 /* pad to cache line boundary */
gdt_end:
.globl gdt_end
/* GDTs of other CPUs */
#ifdef CONFIG_SMP
.rept NR_CPUS-1
.quad 0,0,0,0,0,0,0,0,0,0,0
.endr
#endif
.align 64
ENTRY(idt_table)
.rept 256
......
......@@ -71,6 +71,7 @@ static void __init setup_boot_cpu_data(void)
}
extern void start_kernel(void), pda_init(int), setup_early_printk(char *);
extern int disable_apic;
void __init x86_64_start_kernel(char * real_mode_data)
{
......@@ -82,6 +83,10 @@ void __init x86_64_start_kernel(char * real_mode_data)
s = strstr(saved_command_line, "earlyprintk=");
if (s != NULL)
setup_early_printk(s+12);
#ifdef CONFIG_X86_IO_APIC
if (strstr(saved_command_line, "disableapic"))
disable_apic = 1;
#endif
setup_boot_cpu_data();
start_kernel();
}
#include <linux/linkage.h>
#include <linux/config.h>
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/sched.h>
......@@ -120,7 +119,8 @@ static void end_8259A_irq (unsigned int irq)
BUG();
}
if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)) &&
irq_desc[irq].action)
enable_8259A_irq(irq);
}
......@@ -320,18 +320,6 @@ void mask_and_ack_8259A(unsigned int irq)
}
}
static struct device device_i8259A = {
name: "i8259A",
bus_id: "0020",
};
static int __init init_8259A_devicefs(void)
{
return register_sys_device(&device_i8259A);
}
__initcall(init_8259A_devicefs);
void __init init_8259A(int auto_eoi)
{
unsigned long flags;
......
......@@ -10,7 +10,7 @@
static struct fs_struct init_fs = INIT_FS;
static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
struct mm_struct init_mm = INIT_MM(init_mm);
/*
......
......@@ -56,17 +56,21 @@ static void set_bitmap(unsigned long *bitmap, short base, short extent, int new_
asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
struct thread_struct * t = &current->thread;
struct tss_struct * tss = init_tss + smp_processor_id();
struct tss_struct * tss;
int ret = 0;
if ((from + num <= from) || (from + num > IO_BITMAP_SIZE*32))
return -EINVAL;
if (turn_on && !capable(CAP_SYS_RAWIO))
return -EPERM;
tss = init_tss + get_cpu();
if (!t->io_bitmap_ptr) {
t->io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL);
if (!t->io_bitmap_ptr)
return -ENOMEM;
if (!t->io_bitmap_ptr) {
ret = -ENOMEM;
goto out;
}
memset(t->io_bitmap_ptr,0xff,(IO_BITMAP_SIZE+1)*4);
tss->io_map_base = IO_BITMAP_OFFSET;
}
......@@ -77,7 +81,9 @@ asmlinkage int sys_ioperm(unsigned long from, unsigned long num, int turn_on)
set_bitmap((unsigned long *) t->io_bitmap_ptr, from, num, !turn_on);
set_bitmap((unsigned long *) tss->io_bitmap, from, num, !turn_on);
return 0;
out:
put_cpu();
return ret;
}
/*
......
......@@ -18,7 +18,6 @@
*/
#include <linux/config.h>
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/sched.h>
......@@ -138,7 +137,7 @@ int show_interrupts(struct seq_file *p, void *v)
struct irqaction * action;
seq_printf(p, " ");
for (j=0; j<smp_num_cpus; j++)
for_each_cpu(j)
seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
......@@ -150,9 +149,9 @@ int show_interrupts(struct seq_file *p, void *v)
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
for (j = 0; j < smp_num_cpus; j++)
for_each_cpu(j)
seq_printf(p, "%10u ",
kstat.irqs[cpu_logical_map(j)][i]);
kstat.irqs[j][i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
seq_printf(p, " %s", action->name);
......@@ -162,13 +161,13 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
}
seq_printf(p, "NMI: ");
for (j = 0; j < smp_num_cpus; j++)
seq_printf(p, "%10u ", cpu_pda[cpu_logical_map(j)].__nmi_count);
for_each_cpu(j)
seq_printf(p, "%10u ", cpu_pda[j].__nmi_count);
seq_putc(p, '\n');
#if CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
for (j = 0; j < smp_num_cpus; j++)
seq_printf(p, "%10u ", apic_timer_irqs[cpu_logical_map(j)]);
for_each_cpu(j)
seq_printf(p, "%10u ", cpu_pda[j].apic_timer_irqs);
seq_putc(p, '\n');
#endif
seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
......@@ -197,11 +196,7 @@ inline void synchronize_irq(unsigned int irq)
*/
int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction * action)
{
int status;
irq_enter(0, irq);
status = 1; /* Force the "do bottom halves" bit */
int status = 1; /* Force the "do bottom halves" bit */
if (!(action->flags & SA_INTERRUPT))
local_irq_enable();
......@@ -215,8 +210,6 @@ int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction *
add_interrupt_randomness(irq);
local_irq_disable();
irq_exit(0, irq);
return status;
}
......@@ -236,7 +229,7 @@ int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, struct irqaction *
* Unlike disable_irq(), this function does not ensure existing
* instances of the IRQ handler have completed before returning.
*
* This function may be called from IRQ context.
* This function must not be called from IRQ context.
*/
inline void disable_irq_nosync(unsigned int irq)
......@@ -334,6 +327,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
if (irq > 256) BUG();
irq_enter();
kstat.irqs[cpu][irq]++;
spin_lock(&desc->lock);
desc->handler->ack(irq);
......@@ -349,7 +343,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
* use the action we have.
*/
action = NULL;
if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) {
if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
action = desc->action;
status &= ~IRQ_PENDING; /* we commit to handling */
status |= IRQ_INPROGRESS; /* we are handling it */
......@@ -362,7 +356,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
a different instance of this same irq, the other processor
will take care of it.
*/
if (!action)
if (unlikely(!action))
goto out;
/*
......@@ -380,7 +374,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
handle_IRQ_event(irq, regs, action);
spin_lock(&desc->lock);
if (!(desc->status & IRQ_PENDING))
if (unlikely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;
}
......@@ -394,8 +388,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
desc->handler->end(irq);
spin_unlock(&desc->lock);
if (softirq_pending(cpu))
do_softirq();
irq_exit();
return 1;
}
......@@ -459,7 +452,7 @@ int request_irq(unsigned int irq,
return -EINVAL;
action = (struct irqaction *)
kmalloc(sizeof(struct irqaction), GFP_KERNEL);
kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
if (!action)
return -ENOMEM;
......@@ -522,13 +515,7 @@ void free_irq(unsigned int irq, void *dev_id)
}
spin_unlock_irqrestore(&desc->lock,flags);
#ifdef CONFIG_SMP
/* Wait to make sure it's not being used on another CPU */
while (desc->status & IRQ_INPROGRESS) {
barrier();
cpu_relax();
}
#endif
synchronize_irq(irq);
kfree(action);
return;
}
......@@ -580,7 +567,7 @@ unsigned long probe_irq_on(void)
/* Wait for longstanding interrupts to trigger. */
for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
/* about 20ms delay */ synchronize_irq();
/* about 20ms delay */ barrier();
/*
* enable any unassigned irqs
......@@ -603,7 +590,7 @@ unsigned long probe_irq_on(void)
* Wait for spurious interrupts to trigger
*/
for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
/* about 100ms delay */ synchronize_irq();
/* about 100ms delay */ barrier();
/*
* Now filter out any obviously spurious interrupts
......
......@@ -22,32 +22,13 @@
#include <asm/ldt.h>
#include <asm/desc.h>
void load_gs_index(unsigned gs)
{
int access;
struct task_struct *me = current;
if (me->mm)
read_lock(&me->mm->context.ldtlock);
asm volatile("pushf\n\t"
"cli\n\t"
"swapgs\n\t"
"lar %1,%0\n\t"
"jnz 1f\n\t"
"movl %1,%%eax\n\t"
"movl %%eax,%%gs\n\t"
"jmp 2f\n\t"
"1: movl %2,%%gs\n\t"
"2: swapgs\n\t"
"popf" : "=g" (access) : "g" (gs), "r" (0) : "rax");
if (me->mm)
read_unlock(&me->mm->context.ldtlock);
}
extern void load_gs_index(unsigned gs);
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
static void flush_ldt(void *mm)
static void flush_ldt(void *null)
{
if (current->mm)
load_LDT(&current->mm->context);
if (current->active_mm)
load_LDT(&current->active_mm->context);
}
#endif
......@@ -75,15 +56,18 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
wmb();
pc->ldt = newldt;
wmb();
pc->size = mincount;
wmb();
if (reload) {
load_LDT(pc);
#ifdef CONFIG_SMP
preempt_disable();
if (current->mm->cpu_vm_mask != (1<<smp_processor_id()))
smp_call_function(flush_ldt, 0, 1, 1);
preempt_enable();
#endif
}
wmb();
if (oldsize) {
if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
......@@ -96,11 +80,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{
int err = alloc_ldt(new, old->size, 0);
if (err < 0) {
printk(KERN_WARNING "ldt allocation failed\n");
new->size = 0;
if (err < 0)
return err;
}
memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
return 0;
}
......@@ -187,7 +168,7 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
struct mm_struct * mm = me->mm;
__u32 entry_1, entry_2, *lp;
int error;
struct modify_ldt_ldt_s ldt_info;
struct user_desc ldt_info;
error = -EINVAL;
......@@ -223,34 +204,17 @@ static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
if (oldmode ||
(ldt_info.contents == 0 &&
ldt_info.read_exec_only == 1 &&
ldt_info.seg_32bit == 0 &&
ldt_info.limit_in_pages == 0 &&
ldt_info.seg_not_present == 1 &&
ldt_info.useable == 0 &&
ldt_info.lm == 0)) {
if (oldmode || LDT_empty(&ldt_info)) {
entry_1 = 0;
entry_2 = 0;
goto install;
}
}
entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
(ldt_info.limit & 0x0ffff);
entry_2 = (ldt_info.base_addr & 0xff000000) |
((ldt_info.base_addr & 0x00ff0000) >> 16) |
(ldt_info.limit & 0xf0000) |
((ldt_info.read_exec_only ^ 1) << 9) |
(ldt_info.contents << 10) |
((ldt_info.seg_not_present ^ 1) << 15) |
(ldt_info.seg_32bit << 22) |
(ldt_info.limit_in_pages << 23) |
(ldt_info.lm << 21) |
0x7000;
if (!oldmode)
entry_2 |= (ldt_info.useable << 20);
entry_1 = LDT_entry_a(&ldt_info);
entry_2 = LDT_entry_b(&ldt_info);
if (oldmode)
entry_2 &= ~(1 << 20);
/* Install the new entry ... */
install:
......
......@@ -247,11 +247,11 @@ static int msr_open(struct inode *inode, struct file *file)
* File operations we support
*/
static struct file_operations msr_fops = {
owner: THIS_MODULE,
llseek: msr_seek,
read: msr_read,
write: msr_write,
open: msr_open,
.owner = THIS_MODULE,
.llseek = msr_seek,
.read = msr_read,
.write = msr_write,
.open = msr_open,
};
int __init msr_init(void)
......
......@@ -25,6 +25,9 @@
v2.01 June 2002 Dave Jones <davej@suse.de>
Removal of redundant abstraction layer.
64-bit fixes.
v2.02 July 2002 Dave Jones <davej@suse.de>
Fix gentry inconsistencies between kernel/userspace.
More casts to clean up warnings.
*/
#include <linux/types.h>
......@@ -50,6 +53,7 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/agp_backend.h>
#include <asm/uaccess.h>
#include <asm/io.h>
......@@ -64,7 +68,7 @@
#include <asm/hardirq.h>
#include <linux/irq.h>
#define MTRR_VERSION "2.01 (20020605)"
#define MTRR_VERSION "2.02 (20020716)"
#define TRUE 1
#define FALSE 0
......@@ -106,7 +110,7 @@ static DECLARE_MUTEX (mtrr_lock);
struct set_mtrr_context {
u32 deftype_lo;
u32 deftype_hi;
u64 flags;
unsigned long flags;
u64 cr4val;
};
......@@ -117,7 +121,7 @@ static void set_mtrr_prepare (struct set_mtrr_context *ctxt)
u64 cr0;
/* Disable interrupts locally */
local_save_flags(ctxt->flags);
local_irq_save(ctxt->flags);
local_irq_disable();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
......@@ -221,6 +225,8 @@ static void set_mtrr_up (unsigned int reg, u64 base,
u32 size, mtrr_type type, int do_safe)
{
struct set_mtrr_context ctxt;
u64 base64;
u64 size64;
if (do_safe)
set_mtrr_prepare (&ctxt);
......@@ -230,10 +236,12 @@ static void set_mtrr_up (unsigned int reg, u64 base,
relevant mask register to disable a range. */
wrmsr (MSR_MTRRphysMask(reg), 0, 0);
} else {
wrmsr (MSR_MTRRphysBase(reg), base << PAGE_SHIFT | type,
(base & size_and_mask) >> (32 - PAGE_SHIFT));
wrmsr (MSR_MTRRphysMask(reg), (-size-1) << PAGE_SHIFT | 0x800,
((-size-1) & size_and_mask) >> (32 - PAGE_SHIFT));
base64 = (base << PAGE_SHIFT) & size_and_mask;
wrmsr (MSR_MTRRphysBase(reg), base64 | type, base64 >> 32);
size64 = ~((size << PAGE_SHIFT) - 1);
size64 = size64 & size_and_mask;
wrmsr (MSR_MTRRphysMask(reg), (u32) (size64 | 0x800), (u32) (size64 >> 32));
}
if (do_safe)
set_mtrr_done (&ctxt);
......@@ -267,15 +275,15 @@ static int __init set_mtrr_var_range_testing (unsigned int index,
int changed = FALSE;
rdmsr (MSR_MTRRphysBase(index), lo, hi);
if ((vr->base_lo & 0xfffff0ff) != (lo & 0xfffff0ff)
|| (vr->base_hi & 0x000fffff) != (hi & 0x000fffff)) {
if ((vr->base_lo & 0xfffff0ff) != (lo & 0xfffff0ff) ||
(vr->base_hi & 0x000fffff) != (hi & 0x000fffff)) {
wrmsr (MSR_MTRRphysBase(index), vr->base_lo, vr->base_hi);
changed = TRUE;
}
rdmsr (MSR_MTRRphysMask(index), lo, hi);
if ((vr->mask_lo & 0xfffff800) != (lo & 0xfffff800)
|| (vr->mask_hi & 0x000fffff) != (hi & 0x000fffff)) {
if ((vr->mask_lo & 0xfffff800) != (lo & 0xfffff800) ||
(vr->mask_hi & 0x000fffff) != (hi & 0x000fffff)) {
wrmsr (MSR_MTRRphysMask(index), vr->mask_lo, vr->mask_hi);
changed = TRUE;
}
......@@ -457,7 +465,7 @@ static void set_mtrr_smp (unsigned int reg, u64 base, u32 size, mtrr_type type)
data.smp_type = type;
wait_barrier_execute = TRUE;
wait_barrier_cache_enable = TRUE;
atomic_set (&undone_count, smp_num_cpus - 1);
atomic_set (&undone_count, num_online_cpus() - 1);
/* Start the ball rolling on other CPUs */
if (smp_call_function (ipi_handler, &data, 1, 0) != 0)
......@@ -471,7 +479,7 @@ static void set_mtrr_smp (unsigned int reg, u64 base, u32 size, mtrr_type type)
barrier ();
/* Set up for completion wait and then release other CPUs to change MTRRs */
atomic_set (&undone_count, smp_num_cpus - 1);
atomic_set (&undone_count, num_online_cpus() - 1);
wait_barrier_execute = FALSE;
set_mtrr_up (reg, base, size, type, FALSE);
......@@ -596,11 +604,22 @@ int mtrr_add_page (u64 base, u32 size, unsigned int type, char increment)
if (base + size < 0x100) {
printk (KERN_WARNING
"mtrr: cannot set region below 1 MiB (0x%lx000,0x%x000)\n",
"mtrr: cannot set region below 1 MiB (0x%Lx000,0x%x000)\n",
base, size);
return -EINVAL;
}
#if defined(__x86_64__) && defined(CONFIG_AGP)
/* {
agp_kern_info info;
if (type != MTRR_TYPE_UNCACHABLE && agp_copy_info(&info) >= 0 &&
base<<PAGE_SHIFT >= info.aper_base &&
(base<<PAGE_SHIFT)+(size<<PAGE_SHIFT) >=
info.aper_base+info.aper_size*1024*1024)
printk(KERN_INFO "%s[%d] setting conflicting mtrr into agp aperture\n",current->comm,current->pid);
}*/
#endif
/* Check upper bits of base and last are equal and lower bits are 0
for base and 1 for last */
last = base + size - 1;
......@@ -609,7 +628,7 @@ int mtrr_add_page (u64 base, u32 size, unsigned int type, char increment)
if (lbase != last) {
printk (KERN_WARNING
"mtrr: base(0x%lx000) is not aligned on a size(0x%x000) boundary\n",
"mtrr: base(0x%Lx000) is not aligned on a size(0x%x000) boundary\n",
base, size);
return -EINVAL;
}
......@@ -626,8 +645,14 @@ int mtrr_add_page (u64 base, u32 size, unsigned int type, char increment)
return -ENOSYS;
}
if (base & size_or_mask || size & size_or_mask) {
printk ("mtrr: base or size exceeds the MTRR width\n");
if (base & (size_or_mask>>PAGE_SHIFT)) {
printk (KERN_WARNING "mtrr: base(%lx) exceeds the MTRR width(%lx)\n",
base, (size_or_mask>>PAGE_SHIFT));
return -EINVAL;
}
if (size & (size_or_mask>>PAGE_SHIFT)) {
printk (KERN_WARNING "mtrr: size exceeds the MTRR width\n");
return -EINVAL;
}
......@@ -646,8 +671,8 @@ int mtrr_add_page (u64 base, u32 size, unsigned int type, char increment)
if ((base < lbase) || (base + size > lbase + lsize)) {
up (&mtrr_lock);
printk (KERN_WARNING
"mtrr: 0x%lx000,0x%x000 overlaps existing"
" 0x%lx000,0x%x000\n", base, size, lbase, lsize);
"mtrr: 0x%Lx000,0x%x000 overlaps existing"
" 0x%Lx000,0x%x000\n", base, size, lbase, lsize);
return -EINVAL;
}
/* New region is enclosed by an existing region */
......@@ -656,7 +681,7 @@ int mtrr_add_page (u64 base, u32 size, unsigned int type, char increment)
continue;
up (&mtrr_lock);
printk
("mtrr: type mismatch for %lx000,%x000 old: %s new: %s\n",
("mtrr: type mismatch for %Lx000,%x000 old: %s new: %s\n",
base, size,
attrib_to_str (ltype),
attrib_to_str (type));
......@@ -720,7 +745,7 @@ int mtrr_add (u64 base, u32 size, unsigned int type, char increment)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
printk ("mtrr: size and base must be multiples of 4 kiB\n");
printk ("mtrr: size: 0x%x base: 0x%lx\n", size, base);
printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base);
return -EINVAL;
}
return mtrr_add_page (base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
......@@ -763,7 +788,7 @@ int mtrr_del_page (int reg, u64 base, u32 size)
}
if (reg < 0) {
up (&mtrr_lock);
printk ("mtrr: no MTRR for %lx000,%x000 found\n", base, size);
printk ("mtrr: no MTRR for %Lx000,%x000 found\n", base, size);
return -EINVAL;
}
}
......@@ -814,7 +839,7 @@ int mtrr_del (int reg, u64 base, u32 size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
printk ("mtrr: size and base must be multiples of 4 kiB\n");
printk ("mtrr: size: 0x%x base: 0x%lx\n", size, base);
printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base);
return -EINVAL;
}
return mtrr_del_page (reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
......@@ -844,7 +869,7 @@ static int mtrr_file_add (u64 base, u32 size, unsigned int type,
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
printk
("mtrr: size and base must be multiples of 4 kiB\n");
printk ("mtrr: size: 0x%x base: 0x%lx\n", size, base);
printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base);
return -EINVAL;
}
base >>= PAGE_SHIFT;
......@@ -869,7 +894,7 @@ static int mtrr_file_del (u64 base, u32 size,
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
printk
("mtrr: size and base must be multiples of 4 kiB\n");
printk ("mtrr: size: 0x%x base: 0x%lx\n", size, base);
printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base);
return -EINVAL;
}
base >>= PAGE_SHIFT;
......@@ -961,7 +986,7 @@ static ssize_t mtrr_write (struct file *file, const char *buf,
if ((base & 0xfff) || (size & 0xfff)) {
printk ("mtrr: size and base must be multiples of 4 kiB\n");
printk ("mtrr: size: 0x%x base: 0x%lx\n", size, base);
printk ("mtrr: size: 0x%x base: 0x%Lx\n", size, base);
return -EINVAL;
}
......@@ -1007,8 +1032,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
err =
mtrr_file_add (sentry.base, sentry.size, sentry.type,
err = mtrr_file_add (sentry.base, sentry.size, sentry.type,
file, 0);
if (err < 0)
return err;
......@@ -1049,7 +1073,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
return -EFAULT;
if (gentry.regnum >= get_num_var_ranges ())
return -EINVAL;
get_mtrr (gentry.regnum, &gentry.base, &gentry.size, &type);
get_mtrr (gentry.regnum, (u64*) &gentry.base, &gentry.size, &type);
/* Hide entries that go above 4GB */
if (gentry.base + gentry.size > 0x100000
......@@ -1070,9 +1094,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
return -EPERM;
if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
return -EFAULT;
err =
mtrr_file_add (sentry.base, sentry.size, sentry.type,
file, 1);
err = mtrr_file_add (sentry.base, sentry.size, sentry.type, file, 1);
if (err < 0)
return err;
break;
......@@ -1112,7 +1134,7 @@ static int mtrr_ioctl (struct inode *inode, struct file *file,
return -EFAULT;
if (gentry.regnum >= get_num_var_ranges ())
return -EINVAL;
get_mtrr (gentry.regnum, &gentry.base, &gentry.size, &type);
get_mtrr (gentry.regnum, (u64*) &gentry.base, &gentry.size, &type);
gentry.type = type;
if (copy_to_user ((void *) arg, &gentry, sizeof gentry))
......@@ -1131,6 +1153,7 @@ static int mtrr_close (struct inode *ino, struct file *file)
if (fcount == NULL)
return 0;
lock_kernel ();
max = get_num_var_ranges ();
for (i = 0; i < max; ++i) {
while (fcount[i] > 0) {
......@@ -1139,6 +1162,7 @@ static int mtrr_close (struct inode *ino, struct file *file)
--fcount[i];
}
}
unlock_kernel ();
kfree (fcount);
file->private_data = NULL;
return 0;
......@@ -1146,11 +1170,11 @@ static int mtrr_close (struct inode *ino, struct file *file)
static struct file_operations mtrr_fops = {
owner: THIS_MODULE,
read: mtrr_read,
write: mtrr_write,
ioctl: mtrr_ioctl,
release:mtrr_close,
.owner = THIS_MODULE,
.read = mtrr_read,
.write = mtrr_write,
.ioctl = mtrr_ioctl,
.release = mtrr_close,
};
#ifdef CONFIG_PROC_FS
......@@ -1182,10 +1206,9 @@ static void compute_ascii (void)
factor = 'M';
size >>= 20 - PAGE_SHIFT;
}
sprintf
(ascii_buffer + ascii_buf_bytes,
"reg%02i: base=0x%05lx000 (%4liMB), size=%4i%cB: %s, count=%d\n",
i, base, base >> (20 - PAGE_SHIFT), size, factor,
sprintf (ascii_buffer + ascii_buf_bytes,
"reg%02i: base=0x%05Lx000 (%4iMB), size=%4i%cB: %s, count=%d\n",
i, base, (u32) base >> (20 - PAGE_SHIFT), size, factor,
attrib_to_str (type), usage_table[i]);
ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes);
}
......@@ -1213,8 +1236,12 @@ static void __init mtrr_setup (void)
if ((cpuid_eax (0x80000000) >= 0x80000008)) {
u32 phys_addr;
phys_addr = cpuid_eax (0x80000008) & 0xff;
size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1);
size_and_mask = ~size_or_mask & 0xfffffffffff00000;
size_or_mask = ~((1L << phys_addr) - 1);
/*
* top bits MBZ as its beyond the addressable range.
* bottom bits MBZ as we don't care about lower 12 bits of addr.
*/
size_and_mask = (~size_or_mask) & 0x000ffffffffff000L;
}
printk ("mtrr: detected mtrr type: x86-64\n");
}
......
......@@ -24,7 +24,7 @@
#include <asm/mtrr.h>
#include <asm/mpspec.h>
unsigned int nmi_watchdog = NMI_NONE;
unsigned int nmi_watchdog = NMI_LOCAL_APIC;
static unsigned int nmi_hz = HZ;
unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
extern void show_registers(struct pt_regs *regs);
......@@ -43,22 +43,38 @@ extern void show_registers(struct pt_regs *regs);
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
/* Why is there no CPUID flag for this? */
static __init int cpu_has_lapic(void)
{
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
case X86_VENDOR_AMD:
return boot_cpu_data.x86 >= 6;
/* .... add more cpus here or find a different way to figure this out. */
default:
return 0;
}
}
int __init check_nmi_watchdog (void)
{
int counts[NR_CPUS];
int j, cpu;
int cpu;
if (nmi_watchdog == NMI_LOCAL_APIC && !cpu_has_lapic()) {
nmi_watchdog = NMI_NONE;
return -1;
}
printk(KERN_INFO "testing NMI watchdog ... ");
for (j = 0; j < NR_CPUS; ++j) {
cpu = cpu_logical_map(j);
for_each_cpu(cpu) {
counts[cpu] = cpu_pda[cpu].__nmi_count;
}
sti();
local_irq_enable();
mdelay((10*1000)/nmi_hz); // wait 10 ticks
for (j = 0; j < smp_num_cpus; j++) {
cpu = cpu_logical_map(j);
for_each_cpu(cpu) {
if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) {
printk("CPU#%d: NMI appears to be stuck (%d)!\n",
cpu,
......@@ -84,26 +100,6 @@ static int __init setup_nmi_watchdog(char *str)
if (nmi >= NMI_INVALID)
return 0;
if (nmi == NMI_NONE)
nmi_watchdog = nmi;
/*
* If any other x86 CPU has a local APIC, then
* please test the NMI stuff there and send me the
* missing bits. Right now Intel P6 and AMD K7 only.
*/
if ((nmi == NMI_LOCAL_APIC) &&
(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
(boot_cpu_data.x86 == 6))
nmi_watchdog = nmi;
if ((nmi == NMI_LOCAL_APIC) &&
(boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
(boot_cpu_data.x86 == 6))
nmi_watchdog = nmi;
/*
* We can enable the IO-APIC watchdog
* unconditionally.
*/
if (nmi == NMI_IO_APIC)
nmi_watchdog = nmi;
return 1;
}
......@@ -167,6 +163,8 @@ static void __pminit setup_k7_watchdog(void)
int i;
unsigned int evntsel;
/* XXX should check these in EFER */
nmi_perfctr_msr = MSR_K7_PERFCTR0;
for(i = 0; i < 4; ++i) {
......@@ -180,7 +178,7 @@ static void __pminit setup_k7_watchdog(void)
| K7_NMI_EVENT;
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
printk(KERN_INFO "watchdog: setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
apic_write(APIC_LVTPC, APIC_DM_NMI);
evntsel |= K7_EVNTSEL_ENABLE;
......@@ -191,7 +189,11 @@ void __pminit setup_apic_nmi_watchdog (void)
{
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_AMD:
if (boot_cpu_data.x86 != 6)
if (boot_cpu_data.x86 < 6)
return;
/* Simics masquerades as AMD, but does not support
performance counters */
if (strstr(boot_cpu_data.x86_model_id, "Screwdriver"))
return;
setup_k7_watchdog();
break;
......@@ -230,7 +232,7 @@ void touch_nmi_watchdog (void)
* Just reset the alert counters, (other CPUs might be
* spinning on locks we hold):
*/
for (i = 0; i < smp_num_cpus; i++)
for (i = 0; i < NR_CPUS; i++)
alert_counter[i] = 0;
}
......@@ -243,8 +245,7 @@ void nmi_watchdog_tick (struct pt_regs * regs)
* smp_processor_id().
*/
int sum, cpu = smp_processor_id();
sum = apic_timer_irqs[cpu];
sum = read_pda(apic_timer_irqs);
if (last_irq_sums[cpu] == sum) {
/*
......
......@@ -56,6 +56,7 @@
#include <linux/irq.h>
asmlinkage extern void ret_from_fork(void);
int sys_arch_prctl(int code, unsigned long addr);
unsigned long kernel_thread_flags = CLONE_VM;
......@@ -163,9 +164,11 @@ void show_regs(struct pt_regs * regs)
unsigned int ds,cs,es;
printk("\n");
print_modules();
printk("Pid: %d, comm: %.20s %s\n", current->pid, current->comm, print_tainted());
printk("RIP: %04lx:[<%016lx>]\n", regs->cs & 0xffff, regs->rip);
printk("RSP: %016lx EFLAGS: %08lx\n", regs->rsp, regs->eflags);
printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
printk_address(regs->rip);
printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->rax, regs->rbx, regs->rcx);
printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
......@@ -255,6 +258,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
if (rsp == ~0) {
childregs->rsp = (unsigned long)childregs;
}
p->user_tid = NULL;
p->thread.rsp = (unsigned long) childregs;
p->thread.rsp0 = (unsigned long) (childregs+1);
......@@ -281,6 +285,30 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
(IO_BITMAP_SIZE+1)*4);
}
/*
* Set a new TLS for the child thread?
*/
if (clone_flags & CLONE_SETTLS) {
struct n_desc_struct *desc;
struct user_desc info;
int idx;
if (copy_from_user(&info, test_thread_flag(TIF_IA32) ?
(void *)childregs->rsi :
(void *)childregs->rdx, sizeof(info)))
return -EFAULT;
if (LDT_empty(&info))
return -EINVAL;
idx = info.entry_number;
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
desc = (struct n_desc_struct *)(p->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN;
desc->a = LDT_entry_a(&info);
desc->b = LDT_entry_b(&info);
}
return 0;
}
......@@ -305,8 +333,8 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
struct tss_struct *tss = init_tss + smp_processor_id();
int cpu = smp_processor_id();
struct tss_struct *tss = init_tss + cpu;
unlazy_fpu(prev_p);
......@@ -317,6 +345,7 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch DS and ES.
* This won't pick up thread selector changes, but I guess that is ok.
*/
asm volatile("movl %%es,%0" : "=m" (prev->es));
if (unlikely(next->es | prev->es))
......@@ -329,25 +358,38 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch FS and GS.
* XXX Check if this is safe on SMP (!= -> |)
* Need to simplify this.
*/
{
unsigned int fsindex;
unsigned int gsindex;
asm volatile("movl %%fs,%0" : "=g" (fsindex));
if (unlikely(fsindex != next->fsindex)) /* or likely? */
loadsegment(fs, next->fsindex);
asm volatile("movl %%gs,%0" : "=g" (gsindex));
/*
* Load the per-thread Thread-Local Storage descriptor.
*/
if (load_TLS(next, cpu)) {
loadsegment(fs,next->fsindex);
/* should find a way to optimize this away - it is
slow */
goto loadgs;
} else {
if (fsindex != next->fsindex)
loadsegment(fs,next->fsindex);
if (gsindex != next->gsindex) {
loadgs:
load_gs_index(next->gsindex);
}
}
if (unlikely(fsindex != prev->fsindex))
prev->fs = 0;
if ((fsindex != prev->fsindex) || (prev->fs != next->fs))
wrmsrl(MSR_FS_BASE, next->fs);
prev->fsindex = fsindex;
}
{
unsigned int gsindex;
asm volatile("movl %%gs,%0" : "=g" (gsindex));
if (unlikely(gsindex != next->gsindex))
load_gs_index(next->gs);
if (unlikely(gsindex != prev->gsindex))
prev->gs = 0;
if (gsindex != prev->gsindex || prev->gs != next->gs)
......@@ -363,6 +405,7 @@ void __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
write_pda(pcurrent, next_p);
write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
/*
* Now maybe reload the debug registers
*/
......@@ -433,16 +476,16 @@ void set_personality_64bit(void)
asmlinkage long sys_fork(struct pt_regs regs)
{
struct task_struct *p;
p = do_fork(SIGCHLD, regs.rsp, &regs, 0);
p = do_fork(SIGCHLD, regs.rsp, &regs, 0, NULL);
return IS_ERR(p) ? PTR_ERR(p) : p->pid;
}
asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, struct pt_regs regs)
asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void *user_tid, struct pt_regs regs)
{
struct task_struct *p;
if (!newsp)
newsp = regs.rsp;
p = do_fork(clone_flags & ~CLONE_IDLETASK, newsp, &regs, 0);
p = do_fork(clone_flags & ~CLONE_IDLETASK, newsp, &regs, 0, user_tid);
return IS_ERR(p) ? PTR_ERR(p) : p->pid;
}
......@@ -459,7 +502,7 @@ asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, struct
asmlinkage long sys_vfork(struct pt_regs regs)
{
struct task_struct *p;
p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, &regs, 0);
p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.rsp, &regs, 0, NULL);
return IS_ERR(p) ? PTR_ERR(p) : p->pid;
}
......@@ -494,7 +537,7 @@ unsigned long get_wchan(struct task_struct *p)
#undef last_sched
#undef first_sched
asmlinkage int sys_arch_prctl(int code, unsigned long addr)
int sys_arch_prctl(int code, unsigned long addr)
{
int ret = 0;
unsigned long tmp;
......@@ -535,3 +578,115 @@ asmlinkage int sys_arch_prctl(int code, unsigned long addr)
return ret;
}
/*
* sys_alloc_thread_area: get a yet unused TLS descriptor index.
*/
static int get_free_idx(void)
{
struct thread_struct *t = &current->thread;
int idx;
for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
if (desc_empty((struct n_desc_struct *)(t->tls_array) + idx))
return idx + GDT_ENTRY_TLS_MIN;
return -ESRCH;
}
/*
* Set a given TLS descriptor:
* When you want addresses > 32bit use arch_prctl()
*/
asmlinkage int sys_set_thread_area(struct user_desc *u_info)
{
struct thread_struct *t = &current->thread;
struct user_desc info;
struct n_desc_struct *desc;
int cpu, idx;
if (copy_from_user(&info, u_info, sizeof(info)))
return -EFAULT;
idx = info.entry_number;
/*
* index -1 means the kernel should try to find and
* allocate an empty descriptor:
*/
if (idx == -1) {
idx = get_free_idx();
if (idx < 0)
return idx;
if (put_user(idx, &u_info->entry_number))
return -EFAULT;
}
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
desc = ((struct n_desc_struct *)t->tls_array) + idx - GDT_ENTRY_TLS_MIN;
/*
* We must not get preempted while modifying the TLS.
*/
cpu = get_cpu();
if (LDT_empty(&info)) {
desc->a = 0;
desc->b = 0;
} else {
desc->a = LDT_entry_a(&info);
desc->b = LDT_entry_b(&info);
}
load_TLS(t, cpu);
put_cpu();
return 0;
}
/*
* Get the current Thread-Local Storage area:
*/
#define GET_BASE(desc) ( \
(((desc)->a >> 16) & 0x0000ffff) | \
(((desc)->b << 16) & 0x00ff0000) | \
( (desc)->b & 0xff000000) )
#define GET_LIMIT(desc) ( \
((desc)->a & 0x0ffff) | \
((desc)->b & 0xf0000) )
#define GET_32BIT(desc) (((desc)->b >> 23) & 1)
#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
asmlinkage int sys_get_thread_area(struct user_desc *u_info)
{
struct user_desc info;
struct n_desc_struct *desc;
int idx;
if (get_user(idx, &u_info->entry_number))
return -EFAULT;
if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
return -EINVAL;
desc = ((struct n_desc_struct *)current->thread.tls_array) + idx - GDT_ENTRY_TLS_MIN;
memset(&info, 0, sizeof(struct user_desc));
info.entry_number = idx;
info.base_addr = GET_BASE(desc);
info.limit = GET_LIMIT(desc);
info.seg_32bit = GET_32BIT(desc);
info.contents = GET_CONTENTS(desc);
info.read_exec_only = !GET_WRITABLE(desc);
info.limit_in_pages = GET_LIMIT_PAGES(desc);
info.seg_not_present = !GET_PRESENT(desc);
info.useable = GET_USEABLE(desc);
if (copy_to_user(u_info, &info, sizeof(info)))
return -EFAULT;
return 0;
}
......@@ -15,6 +15,7 @@
#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/security.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -111,6 +112,11 @@ static int putreg(struct task_struct *child,
return -EIO;
child->thread.es = value & 0xffff;
return 0;
case offsetof(struct user_regs_struct,ss):
if ((value & 3) != 3)
return -EIO;
value &= 0xffff;
return 0;
case offsetof(struct user_regs_struct,fs_base):
if (!((value >> 48) == 0 || (value >> 48) == 0xffff))
return -EIO;
......@@ -128,7 +134,7 @@ static int putreg(struct task_struct *child,
value |= tmp;
break;
case offsetof(struct user_regs_struct,cs):
if (value && (value & 3) != 3)
if ((value & 3) != 3)
return -EIO;
value &= 0xffff;
break;
......@@ -172,6 +178,9 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
/* are we already being traced? */
if (current->ptrace & PT_PTRACED)
goto out;
ret = security_ops->ptrace(current->parent, current);
if (ret)
goto out;
/* set the ptrace bit in the process flags. */
current->ptrace |= PT_PTRACED;
ret = 0;
......
......@@ -29,8 +29,8 @@
* needs to do something only if count was negative before
* the increment operation.
*
* "sleeping" and the contention routine ordering is
* protected by the semaphore spinlock.
* "sleeping" and the contention routine ordering is protected
* by the spinlock in the semaphore's waitqueue head.
*
* Note that these functions are only called when there is
* contention on the lock, and as such all this is the
......@@ -54,39 +54,41 @@ void __up(struct semaphore *sem)
wake_up(&sem->wait);
}
static spinlock_t semaphore_lock = SPIN_LOCK_UNLOCKED;
void __down(struct semaphore * sem)
{
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
unsigned long flags;
tsk->state = TASK_UNINTERRUPTIBLE;
add_wait_queue_exclusive(&sem->wait, &wait);
spin_lock_irqsave(&sem->wait.lock, flags);
add_wait_queue_exclusive_locked(&sem->wait, &wait);
spin_lock_irq(&semaphore_lock);
sem->sleepers++;
for (;;) {
int sleepers = sem->sleepers;
/*
* Add "everybody else" into it. They aren't
* playing, because we own the spinlock.
* playing, because we own the spinlock in
* the wait_queue_head.
*/
if (!atomic_add_negative(sleepers - 1, &sem->count)) {
sem->sleepers = 0;
break;
}
sem->sleepers = 1; /* us - see -1 above */
spin_unlock_irq(&semaphore_lock);
spin_unlock_irqrestore(&sem->wait.lock, flags);
schedule();
spin_lock_irqsave(&sem->wait.lock, flags);
tsk->state = TASK_UNINTERRUPTIBLE;
spin_lock_irq(&semaphore_lock);
}
spin_unlock_irq(&semaphore_lock);
remove_wait_queue(&sem->wait, &wait);
remove_wait_queue_locked(&sem->wait, &wait);
wake_up_locked(&sem->wait);
spin_unlock_irqrestore(&sem->wait.lock, flags);
tsk->state = TASK_RUNNING;
wake_up(&sem->wait);
}
int __down_interruptible(struct semaphore * sem)
......@@ -94,11 +96,13 @@ int __down_interruptible(struct semaphore * sem)
int retval = 0;
struct task_struct *tsk = current;
DECLARE_WAITQUEUE(wait, tsk);
unsigned long flags;
tsk->state = TASK_INTERRUPTIBLE;
add_wait_queue_exclusive(&sem->wait, &wait);
spin_lock_irqsave(&sem->wait.lock, flags);
add_wait_queue_exclusive_locked(&sem->wait, &wait);
spin_lock_irq(&semaphore_lock);
sem->sleepers ++;
sem->sleepers++;
for (;;) {
int sleepers = sem->sleepers;
......@@ -118,25 +122,27 @@ int __down_interruptible(struct semaphore * sem)
/*
* Add "everybody else" into it. They aren't
* playing, because we own the spinlock. The
* "-1" is because we're still hoping to get
* the lock.
* playing, because we own the spinlock in
* wait_queue_head. The "-1" is because we're
* still hoping to get the semaphore.
*/
if (!atomic_add_negative(sleepers - 1, &sem->count)) {
sem->sleepers = 0;
break;
}
sem->sleepers = 1; /* us - see -1 above */
spin_unlock_irq(&semaphore_lock);
spin_unlock_irqrestore(&sem->wait.lock, flags);
schedule();
spin_lock_irqsave(&sem->wait.lock, flags);
tsk->state = TASK_INTERRUPTIBLE;
spin_lock_irq(&semaphore_lock);
}
spin_unlock_irq(&semaphore_lock);
remove_wait_queue_locked(&sem->wait, &wait);
wake_up_locked(&sem->wait);
spin_unlock_irqrestore(&sem->wait.lock, flags);
tsk->state = TASK_RUNNING;
remove_wait_queue(&sem->wait, &wait);
wake_up(&sem->wait);
return retval;
}
......@@ -153,18 +159,20 @@ int __down_trylock(struct semaphore * sem)
int sleepers;
unsigned long flags;
spin_lock_irqsave(&semaphore_lock, flags);
spin_lock_irqsave(&sem->wait.lock, flags);
sleepers = sem->sleepers + 1;
sem->sleepers = 0;
/*
* Add "everybody else" and us into it. They aren't
* playing, because we own the spinlock.
* playing, because we own the spinlock in the
* wait_queue_head.
*/
if (!atomic_add_negative(sleepers, &sem->count))
wake_up(&sem->wait);
if (!atomic_add_negative(sleepers, &sem->count)) {
wake_up_locked(&sem->wait);
}
spin_unlock_irqrestore(&semaphore_lock, flags);
spin_unlock_irqrestore(&sem->wait.lock, flags);
return 1;
}
......
......@@ -50,11 +50,11 @@ sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize, struct pt_regs regs)
return -EFAULT;
sigdelsetmask(&newset, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock);
spin_lock_irq(&current->sig->siglock);
saveset = current->blocked;
current->blocked = newset;
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
spin_unlock_irq(&current->sig->siglock);
#if DEBUG_SIG
printk("rt_sigsuspend savset(%lx) newset(%lx) regs(%p) rip(%lx)\n",
saveset, newset, &regs, regs.rip);
......@@ -153,10 +153,10 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs regs)
}
sigdelsetmask(&set, ~_BLOCKABLE);
spin_lock_irq(&current->sigmask_lock);
spin_lock_irq(&current->sig->siglock);
current->blocked = set;
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
spin_unlock_irq(&current->sig->siglock);
if (restore_sigcontext(&regs, &frame->uc.uc_mcontext, &eax)) {
goto badframe;
......@@ -390,11 +390,11 @@ handle_signal(unsigned long sig, siginfo_t *info, sigset_t *oldset,
ka->sa.sa_handler = SIG_DFL;
if (!(ka->sa.sa_flags & SA_NODEFER)) {
spin_lock_irq(&current->sigmask_lock);
spin_lock_irq(&current->sig->siglock);
sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
sigaddset(&current->blocked,sig);
recalc_sigpending();
spin_unlock_irq(&current->sigmask_lock);
spin_unlock_irq(&current->sig->siglock);
}
}
......
......@@ -15,12 +15,14 @@
#include <linux/delay.h>
#include <linux/spinlock.h>
#include <linux/smp_lock.h>
#include <linux/smp.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <asm/mtrr.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/hardirq.h>
/*
* the following functions deal with sending IPIs between CPUs.
......@@ -75,7 +77,7 @@ static inline void send_IPI_allbutself(int vector)
* we get an APIC send error if we try to broadcast.
* thus we have to avoid sending IPIs in this case.
*/
if (smp_num_cpus > 1)
if (num_online_cpus() > 1)
__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
}
......@@ -224,7 +226,7 @@ asmlinkage void smp_invalidate_interrupt (void)
clear_bit(cpu, &flush_cpumask);
out:
put_cpu();
put_cpu_no_resched();
}
static void flush_tlb_others (unsigned long cpumask, struct mm_struct *mm,
......@@ -399,7 +401,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
*/
{
struct call_data_struct data;
int cpus = smp_num_cpus-1;
int cpus = num_online_cpus()-1;
if (!cpus)
return 0;
......@@ -448,7 +450,6 @@ static void stop_this_cpu (void * dummy)
void smp_send_stop(void)
{
smp_call_function(stop_this_cpu, NULL, 1, 0);
smp_num_cpus = 1;
local_irq_disable();
disable_local_APIC();
......@@ -481,7 +482,9 @@ asmlinkage void smp_call_function_interrupt(void)
/*
* At this point the info structure may be out of scope unless wait==1
*/
irq_enter();
(*func)(info);
irq_exit();
if (wait) {
mb();
atomic_inc(&call_data->finished);
......
......@@ -33,7 +33,7 @@
ENTRY(trampoline_data)
r_base = .
wbinvd
mov %cs, %ax # Code and data in the same place
mov %ax, %ds
......
......@@ -36,6 +36,7 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/kdebug.h>
#include <asm/processor.h>
#include <asm/smp.h>
#include <asm/pgalloc.h>
......@@ -47,11 +48,6 @@ asmlinkage int system_call(void);
asmlinkage int kernel_syscall(void);
extern void ia32_syscall(void);
char doublefault_stack[4*1024];
#ifndef CONFIG_SMP
char stackfault_stack[4*1024];
#endif
extern struct gate_struct idt_table[256];
asmlinkage void divide_error(void);
......@@ -101,7 +97,7 @@ int printk_address(unsigned long address)
}
if (!strcmp(modname, "kernel"))
modname = delim = "";
return printk("[%016lx%s%s%s%s%+ld]",
return printk("<%016lx>{%s%s%s%s%+ld}",
address,delim,modname,delim,symname,address-symstart);
}
#else
......@@ -114,7 +110,6 @@ int printk_address(unsigned long address)
#ifdef CONFIG_MODULES
extern struct module *module_list;
extern struct module kernel_module;
static inline int kernel_text_address(unsigned long addr)
......@@ -149,30 +144,59 @@ static inline int kernel_text_address(unsigned long addr)
#endif
static inline unsigned long *in_exception_stack(int cpu, unsigned long stack)
{
int k;
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
unsigned long end = init_tss[cpu].ist[k] + EXCEPTION_STKSZ;
if (stack >= init_tss[cpu].ist[k] && stack <= end)
return (unsigned long *)end;
}
return 0;
}
/*
* These constants are for searching for possible module text
* segments. MODULE_RANGE is a guess of how much space is likely
* to be vmalloced.
* x86-64 can have upto three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault) hardware stack
* Check and process them in order.
*/
#define MODULE_RANGE (8*1024*1024)
void show_trace(unsigned long *stack)
{
unsigned long addr;
unsigned long *irqstack, *irqstack_end;
unsigned long *irqstack, *irqstack_end, *estack_end;
/* FIXME: should read the cpuid from the APIC; to still work with bogus %gs */
const int cpu = smp_processor_id();
int i;
printk("\nCall Trace: ");
printk("\nCall Trace:");
i = 0;
estack_end = in_exception_stack(cpu, (unsigned long)stack);
if (estack_end) {
while (stack < estack_end) {
addr = *stack++;
if (kernel_text_address(addr)) {
i += printk_address(addr);
i += printk(" ");
if (i > 50) {
printk("\n");
i = 0;
}
}
}
i += printk(" <EOE> ");
i += 7;
stack = (unsigned long *) estack_end[-2];
}
irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64);
i = 1;
if (stack >= irqstack && stack < irqstack_end) {
unsigned long *tstack;
printk("<IRQ> ");
while (stack < irqstack_end) {
addr = *stack++;
......@@ -195,13 +219,7 @@ void show_trace(unsigned long *stack)
}
stack = (unsigned long *) (irqstack_end[-1]);
printk(" <EOI> ");
#if 1
tstack = (unsigned long *)(current_thread_info()+1);
if (stack < tstack || (char*)stack > (char*)tstack+THREAD_SIZE)
printk("\n" KERN_DEBUG
"no stack at the end of irqstack; stack:%p, curstack %p\n",
stack, tstack);
#endif
i += 7;
}
while (((long) stack & (THREAD_SIZE-1)) != 0) {
......@@ -260,6 +278,15 @@ void show_stack(unsigned long * rsp)
show_trace((unsigned long *)rsp);
}
/*
* The architecture-independent dump_stack generator
*/
void dump_stack(void)
{
unsigned long dummy;
show_stack(&dummy);
}
void show_registers(struct pt_regs *regs)
{
int i;
......@@ -322,6 +349,7 @@ void handle_BUG(struct pt_regs *regs)
return;
if (__get_user(tmp, f.filename))
f.filename = "unmapped filename";
printk("----------- [cut here ] --------- [please bite here ] ---------\n");
printk("Kernel BUG at %.50s:%d\n", f.filename, f.line);
}
......@@ -377,6 +405,19 @@ static inline unsigned long get_cr2(void)
static void do_trap(int trapnr, int signr, char *str,
struct pt_regs * regs, long error_code, siginfo_t *info)
{
#ifdef CONFIG_CHECKING
{
unsigned long gs;
struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
rdmsrl(MSR_GS_BASE, gs);
if (gs != (unsigned long)pda) {
wrmsrl(MSR_GS_BASE, pda);
printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
regs->rip);
}
}
#endif
if ((regs->cs & 3) != 0) {
struct task_struct *tsk = current;
......@@ -452,6 +493,18 @@ extern void dump_pagetable(unsigned long);
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
{
#ifdef CONFIG_CHECKING
{
unsigned long gs;
struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
rdmsrl(MSR_GS_BASE, gs);
if (gs != (unsigned long)pda) {
wrmsrl(MSR_GS_BASE, pda);
printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
}
}
#endif
if ((regs->cs & 3)!=0) {
struct task_struct *tsk = current;
if (exception_trace)
......@@ -501,8 +554,7 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs)
}
static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
{
printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
}
......@@ -532,6 +584,7 @@ asmlinkage void do_nmi(struct pt_regs * regs)
mem_parity_error(reason, regs);
if (reason & 0x40)
io_check_error(reason, regs);
/*
* Reassert NMI in case it became active meanwhile
* as it's edge-triggered.
......@@ -548,6 +601,18 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
struct task_struct *tsk = current;
siginfo_t info;
#ifdef CONFIG_CHECKING
{
unsigned long gs;
struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
rdmsrl(MSR_GS_BASE, gs);
if (gs != (unsigned long)pda) {
wrmsrl(MSR_GS_BASE, pda);
printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
}
}
#endif
asm("movq %%db6,%0" : "=r" (condition));
if (notify_die(DIE_DEBUG, "debug", regs, error_code) == NOTIFY_BAD)
......
......@@ -47,6 +47,7 @@ extern unsigned long get_cmos_time(void);
EXPORT_SYMBOL(boot_cpu_data);
EXPORT_SYMBOL(dump_fpu);
EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(ioremap_nocache);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(enable_irq);
EXPORT_SYMBOL(disable_irq);
......@@ -109,18 +110,11 @@ EXPORT_SYMBOL(mmx_copy_page);
#ifdef CONFIG_SMP
EXPORT_SYMBOL(cpu_data);
EXPORT_SYMBOL(smp_num_cpus);
EXPORT_SYMBOL(cpu_online_map);
EXPORT_SYMBOL_NOVERS(__write_lock_failed);
EXPORT_SYMBOL_NOVERS(__read_lock_failed);
/* Global SMP irq stuff */
EXPORT_SYMBOL(synchronize_irq);
EXPORT_SYMBOL(global_irq_holder);
EXPORT_SYMBOL(__global_cli);
EXPORT_SYMBOL(__global_sti);
EXPORT_SYMBOL(__global_save_flags);
EXPORT_SYMBOL(__global_restore_flags);
EXPORT_SYMBOL(smp_call_function);
#endif
......
......@@ -55,17 +55,17 @@ SECTIONS
. = ALIGN(64);
.vxtime_sequence : AT ((LOADADDR(.vsyscall_0) + SIZEOF(.vsyscall_0) + 63) & ~(63)) { *(.vxtime_sequence) }
vxtime_sequence = LOADADDR(.vxtime_sequence);
.last_tsc_low : AT (LOADADDR(.vxtime_sequence) + SIZEOF(.vxtime_sequence)) { *(.last_tsc_low) }
last_tsc_low = LOADADDR(.last_tsc_low);
.delay_at_last_interrupt : AT (LOADADDR(.last_tsc_low) + SIZEOF(.last_tsc_low)) { *(.delay_at_last_interrupt) }
delay_at_last_interrupt = LOADADDR(.delay_at_last_interrupt);
.fast_gettimeoffset_quotient : AT (LOADADDR(.delay_at_last_interrupt) + SIZEOF(.delay_at_last_interrupt)) { *(.fast_gettimeoffset_quotient) }
fast_gettimeoffset_quotient = LOADADDR(.fast_gettimeoffset_quotient);
.wall_jiffies : AT (LOADADDR(.fast_gettimeoffset_quotient) + SIZEOF(.fast_gettimeoffset_quotient)) { *(.wall_jiffies) }
. = ALIGN(16);
.hpet : AT ((LOADADDR(.vxtime_sequence) + SIZEOF(.vxtime_sequence) + 15) & ~(15)) { *(.hpet) }
hpet = LOADADDR(.hpet);
. = ALIGN(16);
.wall_jiffies : AT ((LOADADDR(.hpet) + SIZEOF(.hpet) + 15) & ~(15)) { *(.wall_jiffies) }
wall_jiffies = LOADADDR(.wall_jiffies);
.sys_tz : AT (LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies)) { *(.sys_tz) }
. = ALIGN(16);
.sys_tz : AT ((LOADADDR(.wall_jiffies) + SIZEOF(.wall_jiffies) + 15) & ~(15)) { *(.sys_tz) }
sys_tz = LOADADDR(.sys_tz);
.jiffies : AT (LOADADDR(.sys_tz) + SIZEOF(.sys_tz)) { *(.jiffies) }
. = ALIGN(16);
.jiffies : AT ((LOADADDR(.sys_tz) + SIZEOF(.sys_tz) + 15) & ~(15)) { *(.jiffies) }
jiffies = LOADADDR(.jiffies);
. = ALIGN(16);
.xtime : AT ((LOADADDR(.jiffies) + SIZEOF(.jiffies) + 15) & ~(15)) { *(.xtime) }
......@@ -100,7 +100,6 @@ SECTIONS
__initcall_end = .;
. = ALIGN(32);
__per_cpu_start = .;
. = ALIGN(64);
.data.percpu : { *(.data.percpu) }
__per_cpu_end = .;
. = ALIGN(4096);
......
......@@ -11,8 +11,8 @@
*/
#define map_page_into_agp(page) \
change_page_attr(page, __pgprot(__PAGE_KERNEL | _PAGE_PCD))
#define unmap_page_from_agp(page) change_page_attr(page, PAGE_KERNEL)
change_page_attr(page, 1, PAGE_KERNEL_NOCACHE)
#define unmap_page_from_agp(page) change_page_attr(page, 1, PAGE_KERNEL)
#define flush_agp_mappings() global_flush_tlb()
/* Could use CLFLUSH here if the cpu supports it. But then it would
......
......@@ -9,7 +9,7 @@
#ifdef CONFIG_X86_LOCAL_APIC
#define APIC_DEBUG 0
#define APIC_DEBUG 1
#if APIC_DEBUG
#define Dprintk(x...) printk(x)
......@@ -17,6 +17,8 @@
#define Dprintk(x...)
#endif
struct pt_regs;
/*
* Basic functions accessing APICs.
*/
......@@ -70,7 +72,8 @@ extern void init_bsp_APIC (void);
extern void setup_local_APIC (void);
extern void init_apic_mappings (void);
extern void smp_local_timer_interrupt (struct pt_regs * regs);
extern void setup_APIC_clocks (void);
extern void setup_boot_APIC_clock (void);
extern void setup_secondary_APIC_clock (void);
extern void setup_apic_nmi_watchdog (void);
extern inline void nmi_watchdog_tick (struct pt_regs * regs);
extern int APIC_init_uniprocessor (void);
......@@ -80,7 +83,6 @@ extern void enable_APIC_timer(void);
extern struct pm_dev *apic_pm_register(pm_dev_t, unsigned long, pm_callback);
extern void apic_pm_unregister(struct pm_dev*);
extern unsigned int apic_timer_irqs [NR_CPUS];
extern int check_nmi_watchdog (void);
extern unsigned int nmi_watchdog;
......
#ifndef _I386_CACHEFLUSH_H
#define _I386_CACHEFLUSH_H
#ifndef _X8664_CACHEFLUSH_H
#define _X8664_CACHEFLUSH_H
/* Keep includes the same across arches. */
#include <linux/mm.h>
......@@ -18,4 +18,4 @@
void global_flush_tlb(void);
int change_page_attr(struct page *page, int numpages, pgprot_t prot);
#endif /* _I386_CACHEFLUSH_H */
#endif /* _X8664_CACHEFLUSH_H */
......@@ -117,7 +117,7 @@
subq $6*8,%rsp
movq %rax,5*8(%rsp) /* ss */
movq %rax,4*8(%rsp) /* rsp */
movq %rax,3*8(%rsp) /* eflags */
movq $(1<<9),3*8(%rsp) /* eflags */
movq $__KERNEL_CS,2*8(%rsp) /* cs */
movq \child_rip,1*8(%rsp) /* rip */
movq %rax,(%rsp) /* orig_rax */
......
......@@ -80,6 +80,6 @@
#define cpu_has_k6_mtrr 0
#define cpu_has_cyrix_arr 0
#define cpu_has_centaur_mcr 0
#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH)
#endif /* __ASM_X8664_CPUFEATURE_H */
......@@ -7,16 +7,21 @@
#ifndef __ASSEMBLY__
/* Keep this syncronized with kernel/head.S */
#define TSS_START (8 * 8)
#define LDT_START (TSS_START + 16)
#include <asm/segment.h>
#define __TSS(n) (TSS_START + (n)*64)
#define __LDT(n) (LDT_START + (n)*64)
// 8 byte segment descriptor
struct desc_struct {
u16 limit0;
u16 base0;
unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
} __attribute__((packed));
extern __u8 tss_start[];
extern __u8 gdt_table[];
extern __u8 gdt_end[];
struct n_desc_struct {
unsigned int a,b;
};
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
enum {
GATE_INTERRUPT = 0xE,
......@@ -38,14 +43,6 @@ struct gate_struct {
#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
// 8 byte segment descriptor
struct desc_struct {
u16 limit0;
u16 base0;
unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
} __attribute__((packed));
enum {
DESC_TSS = 0x9,
DESC_LDT = 0x2,
......@@ -66,9 +63,9 @@ struct desc_ptr {
unsigned long address;
} __attribute__((packed)) ;
#define load_TR(n) asm volatile("ltr %w0"::"r" (__TSS(n)))
#define __load_LDT(n) asm volatile("lldt %w0"::"r" (__LDT(n)))
#define clear_LDT(n) asm volatile("lldt %w0"::"r" (0))
#define load_TR_desc() asm volatile("ltr %w0"::"r" (GDT_ENTRY_TSS*8))
#define load_LDT_desc() asm volatile("lldt %w0"::"r" (GDT_ENTRY_LDT*8))
#define clear_LDT() asm volatile("lldt %w0"::"r" (0))
/*
* This is the ldt that every process will get unless we need
......@@ -125,34 +122,95 @@ static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned
memcpy(ptr, &d, 16);
}
static inline void set_tss_desc(unsigned n, void *addr)
static inline void set_tss_desc(unsigned cpu, void *addr)
{
set_tssldt_descriptor((__u8*)gdt_table + __TSS(n), (unsigned long)addr,
set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (unsigned long)addr,
DESC_TSS,
sizeof(struct tss_struct));
}
static inline void set_ldt_desc(unsigned n, void *addr, int size)
static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
{
set_tssldt_descriptor((__u8*)gdt_table + __LDT(n), (unsigned long)addr,
set_tssldt_descriptor(&cpu_gdt_table[cpu][GDT_ENTRY_TSS], (unsigned long)addr,
DESC_LDT, size);
}
#define LDT_entry_a(info) \
((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
#define LDT_entry_b(info) \
(((info)->base_addr & 0xff000000) | \
(((info)->base_addr & 0x00ff0000) >> 16) | \
((info)->limit & 0xf0000) | \
(((info)->read_exec_only ^ 1) << 9) | \
((info)->contents << 10) | \
(((info)->seg_not_present ^ 1) << 15) | \
((info)->seg_32bit << 22) | \
((info)->limit_in_pages << 23) | \
((info)->useable << 20) | \
((info)->lm << 21) | \
0x7000)
#define LDT_empty(info) (\
(info)->base_addr == 0 && \
(info)->limit == 0 && \
(info)->contents == 0 && \
(info)->read_exec_only == 1 && \
(info)->seg_32bit == 0 && \
(info)->limit_in_pages == 0 && \
(info)->seg_not_present == 1 && \
(info)->useable == 0 && \
(info)->lm == 0)
#if TLS_SIZE != 24
# error update this code.
#endif
static inline u64 load_TLS(struct thread_struct *t, int cpu)
{
u64 *p, old, new, change;
union u {
struct desc_struct d;
u64 i;
};
change = 0;
/* check assembly! */
#define C(i) \
p = ((u64 *)cpu_gdt_table[cpu]) + GDT_ENTRY_TLS_MIN + i; \
old = *p; \
new = t->tls_array[i]; \
change |= old - new; \
*p = new;
C(0); C(1); C(2);
return change;
}
#undef C
/*
* load one particular LDT into the current CPU
*/
extern inline void load_LDT (mm_context_t *pc)
extern inline void load_LDT_nolock (mm_context_t *pc, int cpu)
{
int cpu = smp_processor_id();
int count = pc->size;
if (!count) {
clear_LDT(cpu);
if (likely(!count)) {
clear_LDT();
return;
}
set_ldt_desc(cpu, pc->ldt, count);
__load_LDT(cpu);
load_LDT_desc();
}
static inline void load_LDT(mm_context_t *pc)
{
int cpu = get_cpu();
load_LDT_nolock(pc, cpu);
put_cpu();
}
#endif /* !__ASSEMBLY__ */
......
......@@ -20,14 +20,14 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
* This is used to ensure we don't load something for the wrong architecture.
*/
#define elf_check_arch(x) \
((x)->e_machine == EM_X8664)
((x)->e_machine == EM_X86_64)
/*
* These are used to set parameters in the core dumps.
*/
#define ELF_CLASS ELFCLASS64
#define ELF_DATA ELFDATA2LSB
#define ELF_ARCH EM_X8664
#define ELF_ARCH EM_X86_64
/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program starts %edx
contains a pointer to a function which might be registered using `atexit'.
......
......@@ -19,75 +19,84 @@ typedef struct {
#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
/*
* Are we in an interrupt context? Either doing bottom half
* or hardware interrupt processing?
* We put the hardirq and softirq counter into the preemption
* counter. The bitmask has the following meaning:
*
* - bits 0-7 are the preemption count (max preemption depth: 256)
* - bits 8-15 are the softirq count (max # of softirqs: 256)
* - bits 16-23 are the hardirq count (max # of hardirqs: 256)
*
* - ( bit 26 is the PREEMPT_ACTIVE flag. )
*
* PREEMPT_MASK: 0x000000ff
* HARDIRQ_MASK: 0x0000ff00
* SOFTIRQ_MASK: 0x00ff0000
*/
#define in_interrupt() \
((read_pda(__local_irq_count) + read_pda(__local_bh_count)) != 0)
#define in_irq() (read_pda(__local_irq_count) != 0)
#ifndef CONFIG_SMP
#define hardirq_trylock(cpu) (local_irq_count() == 0)
#define hardirq_endlock(cpu) do { } while (0)
#define irq_enter(cpu, irq) (local_irq_count()++)
#define irq_exit(cpu, irq) (local_irq_count()--)
#define synchronize_irq() barrier()
#define release_irqlock(cpu) do { } while (0)
#else
#define PREEMPT_BITS 8
#define SOFTIRQ_BITS 8
#define HARDIRQ_BITS 8
#include <asm/atomic.h>
#include <asm/smp.h>
#define PREEMPT_SHIFT 0
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
extern unsigned char global_irq_holder;
extern unsigned volatile long global_irq_lock; /* long for set_bit -RR */
#define __MASK(x) ((1UL << (x))-1)
static inline int irqs_running (void)
{
int i;
#define PREEMPT_MASK (__MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
#define HARDIRQ_MASK (__MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
#define SOFTIRQ_MASK (__MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
for (i = 0; i < smp_num_cpus; i++)
if (read_pda(__local_irq_count))
return 1;
return 0;
}
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
static inline void release_irqlock(int cpu)
{
/* if we didn't own the irq lock, just ignore.. */
if (global_irq_holder == (unsigned char) cpu) {
global_irq_holder = NO_PROC_ID;
clear_bit(0,&global_irq_lock);
}
}
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
static inline void irq_enter(int cpu, int irq)
{
add_pda(__local_irq_count, 1);
/*
* The hardirq mask has to be large enough to have
* space for potentially all IRQ sources in the system
* nesting on a single CPU:
*/
#if (1 << HARDIRQ_BITS) < NR_IRQS
# error HARDIRQ_BITS is too low!
#endif
while (test_bit(0,&global_irq_lock)) {
cpu_relax();
}
}
/*
* Are we doing bottom half or hardware interrupt processing?
* Are we in a softirq context? Interrupt context?
*/
#define in_irq() (hardirq_count())
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
static inline void irq_exit(int cpu, int irq)
{
sub_pda(__local_irq_count, 1);
}
static inline int hardirq_trylock(int cpu)
{
return !read_pda(__local_irq_count) && !test_bit(0,&global_irq_lock);
}
#define hardirq_trylock() (!in_interrupt())
#define hardirq_endlock() do { } while (0)
#define hardirq_endlock(cpu) do { } while (0)
#define irq_enter() (preempt_count() += HARDIRQ_OFFSET)
extern void synchronize_irq(void);
#if CONFIG_PREEMPT
# define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != kernel_locked())
# define IRQ_EXIT_OFFSET (HARDIRQ_OFFSET-1)
#else
# define in_atomic() (preempt_count() != 0)
# define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
#endif
#define irq_exit() \
do { \
preempt_count() -= IRQ_EXIT_OFFSET; \
if (!in_interrupt() && softirq_pending(smp_processor_id())) \
do_softirq(); \
preempt_enable_no_resched(); \
} while (0)
#ifndef CONFIG_SMP
# define synchronize_irq(irq) barrier()
#else
extern void synchronize_irq(unsigned int irq);
#endif /* CONFIG_SMP */
#endif /* __ASM_HARDIRQ_H */
......@@ -72,7 +72,7 @@ static __inline__ void ide_init_default_hwifs(void)
for(index = 0; index < MAX_HWIFS; index++) {
ide_init_hwif_ports(&hw, ide_default_io_base(index), 0, NULL);
hw.irq = ide_default_irq(ide_default_io_base(index));
ide_register_hw(&hw);
ide_register_hw(&hw, NULL);
}
#endif
}
......
......@@ -149,11 +149,7 @@ extern inline void * ioremap (unsigned long offset, unsigned long size)
* it's useful if some control registers are in such an area and write combining
* or read caching is not desirable:
*/
extern inline void * ioremap_nocache (unsigned long offset, unsigned long size)
{
return __ioremap(offset, size, _PAGE_PCD);
}
extern void * ioremap_nocache (unsigned long offset, unsigned long size);
extern void iounmap(void *addr);
/*
......
#ifndef __i386_IPC_H__
#define __i386_IPC_H__
#ifndef __x86_64_IPC_H__
#define __x86_64_IPC_H__
/* dummy */
......
......@@ -9,6 +9,8 @@ enum km_type {
KM_USER1,
KM_BIO_SRC_IRQ,
KM_BIO_DST_IRQ,
KM_IRQ0,
KM_IRQ1,
KM_TYPE_NR
};
......
......@@ -15,7 +15,7 @@
/* Note on 64bit base and limit is ignored and you cannot set
DS/ES/CS not to the default values if you still want to do syscalls. This
call is more for 32bit mode therefore. */
struct modify_ldt_ldt_s {
struct user_desc {
unsigned int entry_number;
unsigned int base_addr;
unsigned int limit;
......
......@@ -5,7 +5,7 @@
#define PROT_WRITE 0x2 /* page can be written */
#define PROT_EXEC 0x4 /* page can be executed */
#define PROT_NONE 0x0 /* page can not be accessed */
#define PROT_SEM 0x0
#define PROT_SEM 0x8
#define MAP_SHARED 0x01 /* Share changes */
#define MAP_PRIVATE 0x02 /* Changes are private */
......
......@@ -43,8 +43,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
*read_pda(level4_pgt) = __pa(next->pgd) | _PAGE_TABLE;
__flush_tlb();
if (next->context.size + prev->context.size)
load_LDT(&next->context);
if (unlikely(next->context.ldt != prev->context.ldt))
load_LDT_nolock(&next->context, cpu);
}
#ifdef CONFIG_SMP
else {
......@@ -56,7 +56,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* tlb flush IPI delivery. We must flush our tlb.
*/
local_flush_tlb();
load_LDT(&next->context);
load_LDT_nolock(&next->context, cpu);
}
}
#endif
......
......@@ -13,6 +13,9 @@
#define THREAD_SIZE (2*PAGE_SIZE)
#define CURRENT_MASK (~(THREAD_SIZE-1))
#define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
#define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
......@@ -56,6 +59,9 @@ typedef struct { unsigned long pgprot; } pgprot_t;
#define __PAGE_OFFSET 0x0000010000000000
#define __PHYSICAL_MASK 0x000000ffffffffff
#define KERNEL_TEXT_SIZE (40UL*1024*1024)
#define KERNEL_TEXT_START 0xffffffff80000000UL
#ifndef __ASSEMBLY__
#include <linux/stringify.h>
......@@ -108,7 +114,7 @@ extern __inline__ int get_order(unsigned long size)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
......
#ifndef _ASMx86_64_PARAM_H
#define _ASMx86_64_PARAM_H
#ifdef __KERNEL__
# define HZ 100 /* Internal kernel timer frequency */
# define USER_HZ 100 /* .. some user interfaces are in "ticks */
#define CLOCKS_PER_SEC (USER_HZ) /* like times() */
#endif
#ifndef HZ
#define HZ 100
#endif
......@@ -17,8 +23,4 @@
#define MAXHOSTNAMELEN 64 /* max length of hostname */
#ifdef __KERNEL__
# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */
#endif
#endif
......@@ -18,12 +18,11 @@ struct x8664_pda {
char *irqstackptr; /* top of irqstack */
unsigned long volatile *level4_pgt;
unsigned int __softirq_pending;
unsigned int __local_irq_count;
unsigned int __local_bh_count;
unsigned int __nmi_count; /* arch dependent */
struct task_struct * __ksoftirqd_task; /* waitqueue is too large */
struct mm_struct *active_mm;
int mmu_state;
unsigned apic_timer_irqs;
} ____cacheline_aligned;
#define PDA_STACKOFFSET (5*8)
......
#ifndef __ARCH_X8664_PERCPU__
#define __ARCH_X8664_PERCPU__
#ifndef _ASM_X8664_PERCPU_H_
#define _ASM_X8664_PERCPU_H_
#include <linux/compiler.h>
#include <linux/config.h>
#ifdef CONFIG_SMP
#include <asm/pda.h>
extern unsigned long __per_cpu_offset[NR_CPUS];
/* Separate out the type, so (int[3], foo) works. */
#ifndef MODULE
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".percpu"))) __typeof__(type) name##__per_cpu
#endif
/* Completely hide the relocation from the compiler to avoid problems with
the optimizer */
#define __per_cpu(offset,base) \
({ typeof(base) ptr = (void *)base; \
asm("addq %1,%0" : "=r" (ptr) : "r" (offset), "0" (ptr)); ptr; })
/* var is in discarded region: offset to particular copy we want */
#define __get_cpu_var(var) (*RELOC_HIDE(&var, read_pda(cpudata_offset)))
#define per_cpu(var, cpu) (*RELOC_HIDE(&var, per_cpu_pda[cpu]))
void setup_per_cpu_areas(void);
#define per_cpu(var,cpu) (*__per_cpu(__per_cpu_offset[cpu], &var##__per_cpu))
#define __get_cpu_var(var) (*__per_cpu(read_pda(cpudata_offset), &var##__per_cpu))
#else /* ! SMP */
/* Can't define per-cpu variables in modules. Sorry --RR */
#ifndef MODULE
#define DEFINE_PER_CPU(type, name) \
__typeof__(type) name##__per_cpu
#endif
#define per_cpu(var, cpu) var##__per_cpu
#define __get_cpu_var(var) var##__per_cpu
#endif
#define DECLARE_PER_CPU(type, name) extern __typeof__(type) name##__per_cpu
extern void setup_per_cpu_areas(void);
#endif /* __ARCH_X8664_PERCPU__ */
#endif /* _ASM_X8664_PERCPU_H_ */
......@@ -127,6 +127,8 @@ static inline void set_pml4(pml4_t *dst, pml4_t val)
#define MODULES_END 0xffffffffafffffff
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define IOMAP_START 0xfffffe8000000000
#define _PAGE_BIT_PRESENT 0
#define _PAGE_BIT_RW 1
#define _PAGE_BIT_USER 2
......@@ -169,6 +171,8 @@ static inline void set_pml4(pml4_t *dst, pml4_t val)
(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED)
#define __PAGE_KERNEL_VSYSCALL \
(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED)
#define __PAGE_KERNEL_LARGE \
(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_PSE)
#define MAKE_GLOBAL(x) __pgprot((x) | _PAGE_GLOBAL)
......@@ -176,6 +180,7 @@ static inline void set_pml4(pml4_t *dst, pml4_t val)
#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
......@@ -245,6 +250,12 @@ static inline int ptep_test_and_clear_young(pte_t *ptep) { return test_and_clea
static inline void ptep_set_wrprotect(pte_t *ptep) { clear_bit(_PAGE_BIT_RW, ptep); }
static inline void ptep_mkdirty(pte_t *ptep) { set_bit(_PAGE_BIT_DIRTY, ptep); }
#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
static inline int pmd_large(pmd_t pte) {
return (pmd_val(pte) & __LARGE_PTE) == __LARGE_PTE;
}
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
......
......@@ -28,6 +28,12 @@
#define VIP_MASK 0x00100000 /* virtual interrupt pending */
#define ID_MASK 0x00200000
#define desc_empty(desc) \
(!((desc)->a + (desc)->b))
#define desc_equal(desc1, desc2) \
(((desc1)->a == (desc2)->a) && ((desc1)->b == (desc2)->b))
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
......@@ -49,8 +55,8 @@ struct cpuinfo_x86 {
__u32 x86_capability[NCAPINTS];
char x86_vendor_id[16];
char x86_model_id[64];
int x86_cache_size; /* in KB - valid for CPUS which support this
call */
int x86_cache_size; /* in KB */
int x86_clflush_size;
unsigned long loops_per_jiffy;
} ____cacheline_aligned;
......@@ -315,10 +321,11 @@ struct thread_struct {
switch faster for a limited number of ioperm using tasks. -AK */
int ioperm;
u32 *io_bitmap_ptr;
/* cached TLS descriptors. */
u64 tls_array[GDT_ENTRY_TLS_ENTRIES];
};
#define INIT_THREAD { \
}
#define INIT_THREAD {}
#define INIT_MMAP \
{ &init_mm, 0, 0, NULL, PAGE_SHARED, VM_READ | VM_WRITE | VM_EXEC, 1, NULL, NULL }
......
#ifndef _ASM_X8664_PROTO_H
#define _ASM_X8664_PROTO_H 1
/* misc architecture specific prototypes */
struct cpuinfo_x86;
extern void get_cpu_vendor(struct cpuinfo_x86*);
extern void start_kernel(void);
extern void pda_init(int);
extern void mcheck_init(struct cpuinfo_x86 *c);
extern void init_memory_mapping(void);
extern void system_call(void);
extern void ia32_cstar_target(void);
extern void calibrate_delay(void);
extern void cpu_idle(void);
extern void sys_ni_syscall(void);
extern void config_acpi_tables(void);
extern void ia32_syscall(void);
extern void iommu_hole_init(void);
extern void do_softirq_thunk(void);
extern int setup_early_printk(char *);
extern void early_printk(const char *fmt, ...) __attribute__((format(printf,1,2)));
extern int k8_scan_nodes(unsigned long start, unsigned long end);
extern int numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
extern unsigned long numa_free_all_bootmem(void);
extern void reserve_bootmem_generic(unsigned long phys, unsigned len);
extern void free_bootmem_generic(unsigned long phys, unsigned len);
extern unsigned long start_pfn, end_pfn;
extern void show_stack(unsigned long * rsp);
extern void exception_table_check(void);
extern void acpi_boot_init(char *);
#define round_up(x,y) (((x) + (y) - 1) & ~((y)-1))
#define round_down(x,y) ((x) & ~((y)-1))
#endif
......@@ -84,7 +84,6 @@ struct pt_regs {
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
#define user_mode(regs) (!!((regs)->cs & 3))
#define instruction_pointer(regs) ((regs)->rip)
extern void show_regs(struct pt_regs *);
void signal_fault(struct pt_regs *regs, void *frame, char *where);
enum {
......
#ifndef _X8664_RMAP_H
#define _X8664_RMAP_H
/* nothing to see, move along */
#include <asm-generic/rmap.h>
#endif
#ifndef _X86_64_RTC_H
#define _X86_64_RTC_H
/*
* x86 uses the default access methods for the RTC.
*/
#include <asm-generic/rtc.h>
#endif
......@@ -47,6 +47,7 @@ struct rwsem_waiter;
extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
/*
* the semaphore definition
......@@ -113,6 +114,31 @@ LOCK_PREFIX " incl (%%rdi)\n\t" /* adds 0x00000001, returns the old value
: "memory", "cc");
}
/*
* trylock for reading -- returns 1 if successful, 0 if contention
*/
static inline int __down_read_trylock(struct rw_semaphore *sem)
{
__s32 result, tmp;
__asm__ __volatile__(
"# beginning __down_read_trylock\n\t"
" movl %0,%1\n\t"
"1:\n\t"
" movl %1,%2\n\t"
" addl %3,%2\n\t"
" jle 2f\n\t"
LOCK_PREFIX " cmpxchgl %2,%0\n\t"
" jnz 1b\n\t"
"2:\n\t"
"# ending __down_read_trylock\n\t"
: "+m"(sem->count), "=&a"(result), "=&r"(tmp)
: "i"(RWSEM_ACTIVE_READ_BIAS)
: "memory", "cc");
return result>=0 ? 1 : 0;
}
/*
* lock for writing
*/
......@@ -138,6 +164,19 @@ LOCK_PREFIX " xaddl %0,(%%rdi)\n\t" /* subtract 0x0000ffff, returns the ol
: "memory", "cc");
}
/*
* trylock for writing -- returns 1 if successful, 0 if contention
*/
static inline int __down_write_trylock(struct rw_semaphore *sem)
{
signed long ret = cmpxchg(&sem->count,
RWSEM_UNLOCKED_VALUE,
RWSEM_ACTIVE_WRITE_BIAS);
if (ret == RWSEM_UNLOCKED_VALUE)
return 1;
return 0;
}
/*
* unlock after reading
*/
......@@ -157,7 +196,7 @@ LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* subtracts 1, returns the old va
" jmp 1b\n"
LOCK_SECTION_END
"# ending __up_read\n"
: "+m"(sem->count), "+r" (tmp)
: "+m"(sem->count), [tmp] "+r" (tmp)
: "D"(sem)
: "memory", "cc");
}
......@@ -170,7 +209,7 @@ static inline void __up_write(struct rw_semaphore *sem)
unsigned tmp;
__asm__ __volatile__(
"# beginning __up_write\n\t"
" movl %2,%[tmp]\n\t"
" movl %[bias],%[tmp]\n\t"
LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
" jnz 2f\n\t" /* jump if the lock is being waited upon */
"1:\n\t"
......@@ -182,8 +221,29 @@ LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* tries to transition 0xffff0001 -
" jmp 1b\n"
LOCK_SECTION_END
"# ending __up_write\n"
: "+m"(sem->count), [tmp] "r" (tmp)
: "D"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS)
: "+m"(sem->count), [tmp] "=r" (tmp)
: "D"(sem), [bias] "i"(-RWSEM_ACTIVE_WRITE_BIAS)
: "memory", "cc");
}
/*
* downgrade write lock to read lock
*/
static inline void __downgrade_write(struct rw_semaphore *sem)
{
__asm__ __volatile__(
"# beginning __downgrade_write\n\t"
LOCK_PREFIX " addl %[bias],(%%rdi)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */
" js 2f\n\t" /* jump if the lock is being waited upon */
"1:\n\t"
LOCK_SECTION_START("")
"2:\n\t"
" call rwsem_downgrade_thunk\n"
" jmp 1b\n"
LOCK_SECTION_END
"# ending __downgrade_write\n"
: "=m"(sem->count)
: "D"(sem), [bias] "i"(-RWSEM_WAITING_BIAS), "m"(sem->count)
: "memory", "cc");
}
......
#ifndef _X8664_SECTIONS_H
#define _X8664_SECTIONS_H
/* nothing to see, move along */
#include <asm-generic/sections.h>
#endif
......@@ -18,4 +18,17 @@
#define __USER_CS 0x33 /* 6*8+3 */
#define __USER32_DS __USER_DS
#define GDT_ENTRY_TLS 1
#define GDT_ENTRY_TSS 8 /* needs two entries */
#define GDT_ENTRY_LDT 10
#define GDT_ENTRY_TLS_MIN 11
#define GDT_ENTRY_TLS_MAX 13
#define GDT_ENTRY_TLS_ENTRIES 3
#define IDT_ENTRIES 256
#define GDT_ENTRIES 16
#define GDT_SIZE (GDT_ENTRIES * 8)
#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
#endif
......@@ -111,7 +111,7 @@ asmlinkage void __up(struct semaphore * sem);
/*
* This is ugly, but we want the default case to fall through.
* "__down_failed" is a special asm handler that calls the C
* routine that actually waits. See arch/i386/kernel/semaphore.c
* routine that actually waits. See arch/x86_64/kernel/semaphore.c
*/
static inline void down(struct semaphore * sem)
{
......
......@@ -141,7 +141,6 @@ typedef void (*__sighandler_t)(int);
#define SIG_IGN ((__sighandler_t)1) /* ignore signal */
#define SIG_ERR ((__sighandler_t)-1) /* error return from signal */
#ifdef __KERNEL__
struct sigaction {
__sighandler_t sa_handler;
unsigned long sa_flags;
......@@ -152,7 +151,6 @@ struct sigaction {
struct k_sigaction {
struct sigaction sa;
};
#endif /* __KERNEL__ */
typedef struct sigaltstack {
void *ss_sp;
......
......@@ -7,13 +7,12 @@
#ifndef __ASSEMBLY__
#include <linux/config.h>
#include <linux/threads.h>
#include <linux/ptrace.h>
#include <linux/bitops.h>
#endif
#ifdef CONFIG_X86_LOCAL_APIC
#ifndef __ASSEMBLY__
#include <asm/fixmap.h>
#include <asm/bitops.h>
#include <asm/mpspec.h>
#ifdef CONFIG_X86_IO_APIC
#include <asm/io_apic.h>
......@@ -28,6 +27,8 @@
#include <asm/pda.h>
struct pt_regs;
/*
* Private routines/data
*/
......@@ -50,6 +51,11 @@ extern void zap_low_mappings (void);
* This simplifies scheduling and IPI sending and
* compresses data structures.
*/
extern volatile unsigned long cpu_callout_map;
#define cpu_possible(cpu) (cpu_callout_map & (1<<(cpu)))
extern inline int cpu_logical_map(int cpu)
{
return cpu;
......@@ -59,6 +65,34 @@ extern inline int cpu_number_map(int cpu)
return cpu;
}
extern inline unsigned int num_online_cpus(void)
{
return hweight32(cpu_online_map);
}
extern inline int find_next_cpu(unsigned cpu)
{
unsigned long left = cpu_online_map >> (cpu+1);
if (!left)
return -1;
return ffz(~left) + cpu;
}
extern inline int find_first_cpu(void)
{
return ffz(~cpu_online_map);
}
#define for_each_cpu(i) \
for((i) = find_first_cpu(); (i)>=0; (i)=find_next_cpu(i))
extern volatile unsigned long cpu_callout_map;
/* We don't mark CPUs online until __cpu_up(), so we need another measure */
static inline int num_booting_cpus(void)
{
return hweight32(cpu_callout_map);
}
/*
* Some lowlevel functions might want to know about
* the real APIC ID <-> CPU # mapping.
......@@ -66,13 +100,6 @@ extern inline int cpu_number_map(int cpu)
extern volatile int x86_apicid_to_cpu[NR_CPUS];
extern volatile int x86_cpu_to_apicid[NR_CPUS];
/*
* General functions that each host system must provide.
*/
extern void smp_boot_cpus(void);
extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */
/*
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup. We map APIC_BASE very early in page_setup(),
......@@ -81,12 +108,14 @@ extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial
#define smp_processor_id() read_pda(cpunumber)
extern __inline int hard_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
}
#define cpu_online(cpu) (cpu_online_map & (1<<(cpu)))
#endif /* !ASSEMBLY */
#define NO_PROC_ID 0xFF /* No processor magic marker */
......@@ -98,6 +127,8 @@ extern __inline int hard_smp_processor_id(void)
#ifndef CONFIG_SMP
#define stack_smp_processor_id() 0
#define for_each_cpu(x) (x)=0;
#define cpu_logical_map(x) (x)
#else
#include <asm/thread_info.h>
#define stack_smp_processor_id() \
......
......@@ -3,7 +3,7 @@
#include <asm/sockios.h>
/* For setsockoptions(2) */
/* For setsockopt(2) */
#define SOL_SOCKET 1
#define SO_DEBUG 1
......
#ifndef __ASM_SOFTIRQ_H
#define __ASM_SOFTIRQ_H
#include <asm/atomic.h>
#include <linux/preempt.h>
#include <asm/hardirq.h>
#include <asm/pda.h>
#define __cpu_bh_enable() do { \
barrier(); sub_pda(__local_bh_count,1); preempt_enable(); } while (0)
#define cpu_bh_disable() do { \
preempt_disable(); add_pda(__local_bh_count,1); barrier(); } while (0)
#define local_bh_disable() \
do { preempt_count() += SOFTIRQ_OFFSET; barrier(); } while (0)
#define __local_bh_enable() \
do { barrier(); preempt_count() -= SOFTIRQ_OFFSET; } while (0)
#define local_bh_disable() cpu_bh_disable()
#define __local_bh_enable() __cpu_bh_enable()
#define in_softirq() (read_pda(__local_bh_count) != 0)
#define _local_bh_enable() do { \
asm volatile( \
"decl %%gs:%c1;" \
"jnz 1f;" \
"cmpl $0,%%gs:%c0;" \
"jnz 2f;" \
"1:;" \
".section .text.lock,\"ax\";" \
"2: call do_softirq_thunk;" \
"jmp 1b;" \
".previous" \
:: "i" (pda_offset(__softirq_pending)), \
"i" (pda_offset(__local_bh_count)) : \
"memory"); \
#define local_bh_enable() \
do { \
__local_bh_enable(); \
if (unlikely(!in_interrupt() && softirq_pending(smp_processor_id()))) \
do_softirq(); \
preempt_check_resched(); \
} while (0)
#define local_bh_enable() do { _local_bh_enable(); preempt_enable(); } while(0)
#endif /* __ASM_SOFTIRQ_H */
......@@ -46,13 +46,13 @@ typedef struct {
"\n1:\t" \
"lock ; decb %0\n\t" \
"js 2f\n" \
".section .text.lock,\"ax\"\n" \
LOCK_SECTION_START("") \
"2:\t" \
"cmpb $0,%0\n\t" \
"rep;nop\n\t" \
"jle 2b\n\t" \
"jmp 1b\n" \
".previous"
LOCK_SECTION_END
/*
* This works. Despite all the confusion.
......@@ -168,4 +168,6 @@ static inline int _raw_write_trylock(rwlock_t *lock)
return 0;
}
#define rwlock_is_locked(x) ((x)->lock != RW_LOCK_BIAS)
#endif /* __ASM_SPINLOCK_H */
......@@ -39,6 +39,7 @@
__POP(rax) __POP(r15) __POP(r14) __POP(r13) __POP(r12) __POP(r11) __POP(r10) \
__POP(r9) __POP(r8)
/* RED-PEN: pipeline stall on ret because it is not predicted */
#define switch_to(prev,next,last) \
asm volatile(SAVE_CONTEXT \
"movq %%rsp,%[prevrsp]\n\t" \
......@@ -72,7 +73,7 @@ extern void load_gs_index(unsigned);
"jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n\t" \
".align 4\n\t" \
".align 8\n\t" \
".quad 1b,3b\n" \
".previous" \
: :"r" ((int)(value)))
......@@ -241,33 +242,15 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
#define local_irq_enable() __asm__ __volatile__("sti": : :"memory")
/* used in the idle loop; sti takes one instruction cycle to complete */
#define safe_halt() __asm__ __volatile__("sti; hlt": : :"memory")
#define irqs_disabled() \
({ \
unsigned long flags; \
local_save_flags(flags); \
!(flags & (1<<9)); \
})
/* For spinlocks etc */
#define local_irq_save(x) do { warn_if_not_ulong(x); __asm__ __volatile__("# local_irq_save \n\t pushfq ; popq %0 ; cli":"=g" (x): /* no input */ :"memory"); } while (0)
#define local_irq_restore(x) __asm__ __volatile__("# local_irq_restore \n\t pushq %0 ; popfq": /* no output */ :"g" (x):"memory")
#ifdef CONFIG_SMP
extern void __global_cli(void);
extern void __global_sti(void);
extern unsigned long __global_save_flags(void);
extern void __global_restore_flags(unsigned long);
#define cli() __global_cli()
#define sti() __global_sti()
#define save_flags(x) ((x)=__global_save_flags())
#define restore_flags(x) __global_restore_flags(x)
#else
#define cli() local_irq_disable()
#define sti() local_irq_enable()
#define save_flags(x) local_save_flags(x)
#define restore_flags(x) local_irq_restore(x)
#endif
/* Default simics "magic" breakpoint */
#define icebp() asm volatile("xchg %%bx,%%bx" ::: "ebx")
/*
* disable hlt during certain critical i/o operations
......
......@@ -35,15 +35,17 @@ struct thread_info {
/*
* macros/functions for gaining access to the thread information structure
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
#ifndef __ASSEMBLY__
#define INIT_THREAD_INFO(tsk) \
{ \
task: &tsk, \
exec_domain: &default_exec_domain, \
flags: 0, \
cpu: 0, \
addr_limit: KERNEL_DS, \
.task = &tsk, \
.exec_domain = &default_exec_domain, \
.flags = 0, \
.cpu = 0, \
.preempt_count = 1, \
.addr_limit = KERNEL_DS, \
}
#define init_thread_info (init_thread_union.thread_info)
......
......@@ -42,6 +42,11 @@ typedef unsigned long long u64;
typedef u64 dma64_addr_t;
typedef u64 dma_addr_t;
#ifdef CONFIG_LBD
typedef u64 sector_t;
#define HAVE_SECTOR_T
#endif
#endif /* __KERNEL__ */
#endif
......@@ -468,10 +468,22 @@ __SYSCALL(__NR_futex, sys_futex)
__SYSCALL(__NR_sched_setaffinity, sys_sched_setaffinity)
#define __NR_sched_getaffinity 204
__SYSCALL(__NR_sched_getaffinity, sys_sched_getaffinity)
#define __NR_syscall_max __NR_sched_getaffinity
#define __NR_set_thread_area 205
__SYSCALL(__NR_set_thread_area, sys_set_thread_area)
#define __NR_io_setup 206
__SYSCALL(__NR_io_setup, sys_io_setup)
#define __NR_io_destroy 207
__SYSCALL(__NR_io_destroy, sys_io_destroy)
#define __NR_io_getevents 208
__SYSCALL(__NR_io_getevents, sys_io_getevents)
#define __NR_io_submit 209
__SYSCALL(__NR_io_submit, sys_io_submit)
#define __NR_io_cancel 210
__SYSCALL(__NR_io_cancel, sys_io_cancel)
#define __NR_get_thread_area 211
__SYSCALL(__NR_get_thread_area, sys_get_thread_area)
#define __NR_syscall_max __NR_get_thread_area
#ifndef __NO_STUBS
/* user-visible error numbers are in the range -1 - -4095 */
......@@ -529,7 +541,7 @@ long __res; \
__asm__ volatile (__syscall \
: "=a" (__res) \
: "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \
"d" ((long)(arg3)) : __syscall_clobber, "r9" ); \
"d" ((long)(arg3)) : __syscall_clobber); \
__syscall_return(type,__res); \
}
......@@ -549,11 +561,25 @@ __syscall_return(type,__res); \
type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5) \
{ \
long __res; \
__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r9 ; " __syscall \
__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; " __syscall \
: "=a" (__res) \
: "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \
"d" ((long)(arg3)),"g" ((long)(arg4)),"g" ((long)(arg5)) : \
__syscall_clobber,"r8","r9","r10" ); \
__syscall_clobber,"r8","r10" ); \
__syscall_return(type,__res); \
}
#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
type5,arg5,type6,arg6) \
type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5,type6 arg6) \
{ \
long __res; \
__asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9" __syscall \
: "=a" (__res) \
: "0" (__NR_##name),"D" ((long)(arg1)),"S" ((long)(arg2)), \
"d" ((long)(arg3)),"g" ((long)(arg4)),"g" ((long)(arg5), \
"g" ((long)(arg6),) : \
__syscall_clobber,"r8","r10","r9" ); \
__syscall_return(type,__res); \
}
......
......@@ -3,7 +3,6 @@
#include <asm/types.h>
#include <asm/page.h>
#include <linux/ptrace.h>
/* Core file format: The core file is written in such a way that gdb
can understand it and provide useful information to the user.
There are quite a number of obstacles to being able to view the
......
......@@ -46,7 +46,7 @@ typedef struct { unsigned long a,b; } __attribute__((aligned(16))) xmm_store_t;
"movups %%xmm1,0x10(%1) ;\n\t" \
"movups %%xmm2,0x20(%1) ;\n\t" \
"movups %%xmm3,0x30(%1) ;\n\t" \
: "=r" (cr0) \
: "=&r" (cr0) \
: "r" (xmm_save) \
: "memory"); \
} while(0)
......@@ -335,11 +335,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
}
static struct xor_block_template xor_block_sse = {
name: "generic_sse",
do_2: xor_sse_2,
do_3: xor_sse_3,
do_4: xor_sse_4,
do_5: xor_sse_5,
.name = "generic_sse",
.do_2 = xor_sse_2,
.do_3 = xor_sse_3,
.do_4 = xor_sse_4,
.do_5 = xor_sse_5,
};
#undef XOR_TRY_TEMPLATES
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment