Commit b4f6270e authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 updates

Lots of x86-64 updates. Merge with 2.4 and NUMA works now. Also reenabled
the preemptive kernel. And some other bug fixes.
IOMMU disabled by default now because it has problems.

 - Add more CONFIG options for device driver debugging and iommu
   force/debug.  (don't enable iommu force currently)
 - Some S3/ACPI fixes/cleanups from Pavel.
 - Set MSG_COMPAT_* in msg_flags for networking 32bit emulation.
   This unfortunately still doesn't fix the fd passing problems.
 - Sync PCI IOMMU code with 2.4 (minor fixes, flush less often)
 - Really fix UP compilation (Pavel)
 - Reenable preempt
 - Fix CONFIG_DISCONTIGMEM bootup and enable.  Still needs more tuning.
 - Fix some bugs in topology discovery and clean code up.
 - Don't put unwind tables into object files
 - Some kernel debugging hooks
 - Move CPU detection into early real mode code to better interact with
   vesafb consoles
 - Initialize mode in real mode character output
 - New 32bit FPU signal save/restore
 - Various fixes in FPU handling in ptrace
 - Fix security holes in ptrace (32bit and 64bit)
 - Fix serial ioctl (including security hole)
 - Add bluetooth ioctls to 32bit emu (from sparc64)
 - Correctly enable si_val in queued signals in 32bit emulation
 - Rework SEM_STAT emulation.  LTP still fails unfortunately.
 - Fix error case in msg* emulation
 - Fix debug register access from ptrace (Michal Ludvig, me)
 - Fix handling of NULL arguments in 32bit execve
 - Fix some error cases for 32bit readv/writev (LTP still complains)
 - Remove rate control from unimplemented syscall warnings
 - Fix error message for missing aperture
 - Turn some APIC printks into Dprintk to make the bootup more quiet
 - Some fixes for no APIC (probably still broken), add disableapic
   option (untested)
 - Sync K8 MCE handler with 2.4.  Should work a lot better now.
 - Remove never used KDB hooks
 - Fix buffer overflow in command line copying
 - Merge from i386: use separate status word for lazy FPU state
 - Don't force the IOMMU for dma masks < 4GB.
 - Print backtrace in Sysrq-T (from Andrea)
 - Merge from i386: fix FPU race in fork.
 - Disable NX mode by default for now
 - Rewrite dump_pagetable
 - Fix off by one bug in ioremap (i386 merge)
 - Merge from i386: handle VIA pci bridge bugs
 - Disable NUMA ACPI support (no SRAT support yet)
 - Fix aio 32bit emulation
 - Increase 32bit address space to nearly 4GB
 - Add exit_group syscall
 - Fix TLS setting in clone (Ulrich Drepper)
parent 5563e77b
......@@ -179,9 +179,7 @@ config SMP
If you don't know what to do here, say N.
# broken currently
config PREEMPT
depends on NOT_WORKING
bool "Preemptible Kernel"
---help---
This option reduces the latency of the kernel when reacting to
......@@ -200,7 +198,7 @@ config PREEMPT
# someone write a better help text please.
config K8_NUMA
bool "K8 NUMA support"
depends on SMP && NOT_WORKING
depends on SMP
help
Enable NUMA (Non Unified Memory Architecture) support for
AMD Opteron Multiprocessor systems. The kernel will try to allocate
......@@ -590,10 +588,8 @@ config DEBUG_SLAB
allocation as well as poisoning memory on free to catch use of freed
memory.
# bool ' Memory mapped I/O debugging' CONFIG_DEBUG_IOVIRT
config MAGIC_SYSRQ
bool "Magic SysRq key"
depends on DEBUG_KERNEL
help
If you say Y here, you will have some control over the system even
if the system crashes for example during kernel debugging (e.g., you
......@@ -639,14 +635,37 @@ config KALLSYMS
config FRAME_POINTER
bool "Compile the kernel with frame pointers"
depends on DEBUG_KERNEL
help
Compile the kernel with frame pointers. This may help for some
debugging with external debuggers. Note the standard oops backtracer
doesn't make use of it and the x86-64 kernel doesn't ensure an consistent
doesn't make use of this and the x86-64 kernel doesn't ensure an consistent
frame pointer through inline assembly (semaphores etc.)
Normally you should say N.
config IOMMU_DEBUG
bool "Force IOMMU to on"
help
Force the IOMMU to on even when you have less than 4GB of memory and add
debugging code.
Can be disabled at boot time with iommu=noforce.
config IOMMU_LEAK
bool "IOMMU leak tracing"
depends on DEBUG_KERNEL
help
Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings.
config MCE_DEBUG
bool "K8 Machine check debugging mode"
default y
help
Turn on all Machine Check debugging for device driver problems.
This can cause panics, but is useful to find device driver problems.
#config X86_REMOTE_DEBUG
# bool "kgdb debugging stub"
endmenu
source "security/Kconfig"
......
......@@ -47,6 +47,10 @@ CFLAGS += -fno-reorder-blocks
# should lower this a lot and see how much .text is saves
CFLAGS += -finline-limit=2000
#CFLAGS += -g
# don't enable this when you use kgdb:
ifneq ($(CONFIG_X86_REMOTE_DEBUG),y)
CFLAGS += -fno-asynchronous-unwind-tables
endif
head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
......
......@@ -274,7 +274,7 @@ static void error(char *x)
puts(x);
puts("\n\n -- System halted");
while(1); /* Halt */
while(1);
}
void setup_normal_output_buffer(void)
......@@ -429,8 +429,6 @@ int decompress_kernel(struct moveparams *mv, void *rmode)
else setup_output_buffer_if_we_run_high(mv);
makecrc();
puts("Checking CPU type...");
check_cpu();
puts(".\nDecompressing Linux...");
gunzip();
puts("done.\nBooting the kernel.\n");
......
......@@ -42,6 +42,7 @@
* if CX/DX have been changed in the e801 call and if so use AX/BX .
* Michael Miller, April 2001 <michaelm@mjmm.org>
*
* Added long mode checking and SSE force. March 2003, Andi Kleen.
*/
#include <linux/config.h>
......@@ -200,10 +201,10 @@ fin: ret
prtsp2: call prtspc # Print double space
prtspc: movb $0x20, %al # Print single space (note: fall-thru)
# Part of above routine, this one just prints ascii al
prtchr: pushw %ax
prtchr:
pushw %ax
pushw %cx
xorb %bh, %bh
movw $0007,%bx
movw $0x01, %cx
movb $0x0e, %ah
int $0x10
......@@ -280,6 +281,75 @@ good_sig:
loader_panic_mess: .string "Wrong loader, giving up..."
loader_ok:
/* check for long mode. */
/* we have to do this before the VESA setup, otherwise the user
can't see the error message. */
pushw %ds
movw %cs,%ax
movw %ax,%ds
/* minimum CPUID flags for x86-64 */
/* see http://www.x86-64.org/lists/discuss/msg02971.html */
#define SSE_MASK ((1<<25)|(1<<26))
#define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|(1<<11)| \
(1<<13)|(1<<15)|(1<<24)|(1<<29))
pushfl /* standard way to check for cpuid */
popl %eax
movl %eax,%ebx
xorl $0x200000,%eax
pushl %eax
popfl
pushfl
popl %eax
cmpl %eax,%ebx
jz no_longmode /* cpu has no cpuid */
movl $0x80000000,%eax
cpuid
cmpl $0x80000001,%eax
jb no_longmode /* no extended cpuid */
xor %di,%di
cmpl $0x68747541,%ebx /* AuthenticAMD */
jnz noamd
cmpl $0x69746e65,%edx
jnz noamd
cmpl $0x444d4163,%ecx
jnz noamd
mov $1,%di /* cpu is from AMD */
noamd:
movl $0x80000001,%eax
cpuid
andl $REQUIRED_MASK1,%edx
xorl $REQUIRED_MASK1,%edx
jnz no_longmode
sse_test:
movl $1,%eax
cpuid
andl $SSE_MASK,%edx
cmpl $SSE_MASK,%edx
je sse_ok
test %di,%di
jz no_longmode /* only try to force SSE on AMD */
movl $0xc0010015,%ecx /* HWCR */
rdmsr
btr $15,%eax /* enable SSE */
wrmsr
xor %di,%di /* don't loop */
jmp sse_test /* try again */
no_longmode:
call beep
lea long_mode_panic,%si
call prtstr
no_longmode_loop:
jmp no_longmode_loop
long_mode_panic:
.string "Your CPU does not support long mode. Use a 32bit distribution."
.byte 0
sse_ok:
popw %ds
# Get memory size (extended mem, kB)
xorl %eax, %eax
......
......@@ -77,17 +77,20 @@ static inline int convert_fxsr_from_user(struct i387_fxsave_struct *fxsave,
struct _fpxreg *to;
struct _fpreg *from;
int i;
int err;
__u32 v;
u32 v;
int err = 0;
err = __get_user(fxsave->cwd, &buf->cw);
err |= __get_user(fxsave->swd, &buf->sw);
err |= __get_user(fxsave->twd, &buf->tag);
#define G(num,val) err |= __get_user(val, num + (u32 *)buf)
G(0, fxsave->cwd);
G(1, fxsave->swd);
G(2, fxsave->twd);
fxsave->twd = twd_i387_to_fxsr(fxsave->twd);
err |= __get_user(fxsave->rip, &buf->ipoff);
err |= __get_user(fxsave->rdp, &buf->dataoff);
err |= __get_user(v, &buf->cssel);
fxsave->fop = v >> 16;
G(3, fxsave->rip);
G(4, v);
fxsave->fop = v>>16; /* cs ignored */
G(5, fxsave->rdp);
/* 6: ds ignored */
#undef G
if (err)
return -1;
......@@ -109,21 +112,29 @@ static inline int convert_fxsr_to_user(struct _fpstate_ia32 *buf,
struct _fpreg *to;
struct _fpxreg *from;
int i;
u32 ds;
int err;
err = __put_user((unsigned long)fxsave->cwd | 0xffff0000, &buf->cw);
err |= __put_user((unsigned long)fxsave->swd | 0xffff0000, &buf->sw);
err |= __put_user((u32)fxsave->rip, &buf->ipoff);
err |= __put_user((u32)(regs->cs | ((u32)fxsave->fop << 16)),
&buf->cssel);
err |= __put_user((u32)twd_fxsr_to_i387(fxsave), &buf->tag);
err |= __put_user((u32)fxsave->rdp, &buf->dataoff);
if (tsk == current)
asm("movl %%ds,%0 " : "=r" (ds));
else /* ptrace. task has stopped. */
u16 cs,ds;
int err = 0;
if (tsk == current) {
/* should be actually ds/cs at fpu exception time,
but that information is not available in 64bit mode. */
asm("movw %%ds,%0 " : "=r" (ds));
asm("movw %%cs,%0 " : "=r" (cs));
} else { /* ptrace. task has stopped. */
ds = tsk->thread.ds;
err |= __put_user(ds, &buf->datasel);
cs = regs->cs;
}
#define P(num,val) err |= __put_user(val, num + (u32 *)buf)
P(0, (u32)fxsave->cwd | 0xffff0000);
P(1, (u32)fxsave->swd | 0xffff0000);
P(2, twd_fxsr_to_i387(fxsave));
P(3, (u32)fxsave->rip);
P(4, cs | ((u32)fxsave->fop) << 16);
P(5, fxsave->rdp);
P(6, 0xffff0000 | ds);
#undef P
if (err)
return -1;
......@@ -144,9 +155,9 @@ int restore_i387_ia32(struct task_struct *tsk, struct _fpstate_ia32 *buf, int fs
&buf->_fxsr_env[0],
sizeof(struct i387_fxsave_struct)))
return -1;
}
tsk->thread.i387.fxsave.mxcsr &= 0xffbf;
current->used_math = 1;
tsk->used_math = 1;
}
return convert_fxsr_from_user(&tsk->thread.i387.fxsave, buf);
}
......@@ -157,12 +168,11 @@ int save_i387_ia32(struct task_struct *tsk,
{
int err = 0;
if (!tsk->used_math)
return 0;
tsk->used_math = 0;
unlazy_fpu(tsk);
init_fpu(tsk);
if (convert_fxsr_to_user(buf, &tsk->thread.i387.fxsave, regs, tsk))
return -1;
if (fsave)
return 0;
err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status);
if (fsave)
return err ? -1 : 1;
......
......@@ -39,6 +39,7 @@
#include <linux/cdrom.h>
#include <linux/loop.h>
#include <linux/auto_fs.h>
#include <linux/auto_fs4.h>
#include <linux/devfs_fs.h>
#include <linux/tty.h>
#include <linux/vt_kern.h>
......@@ -60,6 +61,8 @@
#include <linux/if_tun.h>
#include <linux/dirent.h>
#include <linux/ctype.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/rfcomm.h>
#if defined(CONFIG_BLK_DEV_LVM) || defined(CONFIG_BLK_DEV_LVM_MODULE)
/* Ugh. This header really is not clean */
#define min min
......@@ -2906,35 +2909,28 @@ static int serial_struct_ioctl(unsigned fd, unsigned cmd, void *ptr)
{
typedef struct serial_struct SS;
struct serial_struct32 *ss32 = ptr;
int err = 0;
int err;
struct serial_struct ss;
mm_segment_t oldseg = get_fs();
set_fs(KERNEL_DS);
if (cmd == TIOCSSERIAL) {
err = -EFAULT;
if (copy_from_user(&ss, ss32, sizeof(struct serial_struct32)))
goto out;
return -EFAULT;
memmove(&ss.iomem_reg_shift, ((char*)&ss.iomem_base)+4,
sizeof(SS)-offsetof(SS,iomem_reg_shift));
ss.iomem_base = (void *)((unsigned long)ss.iomem_base & 0xffffffff);
}
if (!err)
set_fs(KERNEL_DS);
err = sys_ioctl(fd,cmd,(unsigned long)(&ss));
set_fs(oldseg);
if (cmd == TIOCGSERIAL && err >= 0) {
__u32 base;
if (__copy_to_user(ss32,&ss,offsetof(SS,iomem_base)) ||
__copy_to_user(&ss32->iomem_reg_shift,
&ss.iomem_reg_shift,
sizeof(SS) - offsetof(SS, iomem_reg_shift)))
err = -EFAULT;
if (ss.iomem_base > (unsigned char *)0xffffffff)
base = -1;
else
base = (unsigned long)ss.iomem_base;
err |= __put_user(base, &ss32->iomem_base);
__put_user((unsigned long)ss.iomem_base >> 32 ?
0xffffffff : (unsigned)(unsigned long)ss.iomem_base,
&ss32->iomem_base) ||
__put_user(ss.iomem_reg_shift, &ss32->iomem_reg_shift) ||
__put_user(ss.port_high, &ss32->port_high))
return -EFAULT;
}
out:
set_fs(oldseg);
return err;
}
......@@ -3045,7 +3041,14 @@ static int do_blkgetsize64(unsigned int fd, unsigned int cmd,
return sys_ioctl(fd, BLKGETSIZE64, arg);
}
/* Bluetooth ioctls */
#define HCIUARTSETPROTO _IOW('U', 200, int)
#define HCIUARTGETPROTO _IOR('U', 201, int)
#define BNEPCONNADD _IOW('B', 200, int)
#define BNEPCONNDEL _IOW('B', 201, int)
#define BNEPGETCONNLIST _IOR('B', 210, int)
#define BNEPGETCONNINFO _IOR('B', 211, int)
struct usbdevfs_ctrltransfer32 {
__u8 bRequestType;
......@@ -4093,6 +4096,7 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_FAIL)
COMPATIBLE_IOCTL(AUTOFS_IOC_CATATONIC)
COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
/* DEVFS */
COMPATIBLE_IOCTL(DEVFSDIOC_GET_PROTO_REV)
COMPATIBLE_IOCTL(DEVFSDIOC_SET_EVENT_MASK)
......@@ -4200,6 +4204,17 @@ COMPATIBLE_IOCTL(HCISETLINKMODE)
COMPATIBLE_IOCTL(HCISETACLMTU)
COMPATIBLE_IOCTL(HCISETSCOMTU)
COMPATIBLE_IOCTL(HCIINQUIRY)
COMPATIBLE_IOCTL(HCIUARTSETPROTO)
COMPATIBLE_IOCTL(HCIUARTGETPROTO)
COMPATIBLE_IOCTL(RFCOMMCREATEDEV)
COMPATIBLE_IOCTL(RFCOMMRELEASEDEV)
COMPATIBLE_IOCTL(RFCOMMGETDEVLIST)
COMPATIBLE_IOCTL(RFCOMMGETDEVINFO)
COMPATIBLE_IOCTL(RFCOMMSTEALDLC)
COMPATIBLE_IOCTL(BNEPCONNADD)
COMPATIBLE_IOCTL(BNEPCONNDEL)
COMPATIBLE_IOCTL(BNEPGETCONNLIST)
COMPATIBLE_IOCTL(BNEPGETCONNINFO)
/* Misc. */
COMPATIBLE_IOCTL(0x41545900) /* ATYIO_CLKR */
COMPATIBLE_IOCTL(0x41545901) /* ATYIO_CLKW */
......
......@@ -47,9 +47,16 @@ static int ia32_copy_siginfo_to_user(siginfo_t32 *to, siginfo_t *from)
{
if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t)))
return -EFAULT;
if (from->si_code < 0)
return __copy_to_user(to, from, sizeof(siginfo_t));
else {
if (from->si_code < 0) {
/* the only field that's different is the alignment
of the pointer in sigval_t. Move that 4 bytes down including
padding. */
memmove(&((siginfo_t32 *)&from)->si_int,
&from->si_int,
sizeof(siginfo_t) - offsetof(siginfo_t, si_int));
/* last 4 bytes stay the same */
return __copy_to_user(to, from, sizeof(siginfo_t32));
} else {
int err;
/* If you change siginfo_t structure, please be sure
......@@ -59,7 +66,7 @@ static int ia32_copy_siginfo_to_user(siginfo_t32 *to, siginfo_t *from)
3 ints plus the relevant union member. */
err = __put_user(from->si_signo, &to->si_signo);
err |= __put_user(from->si_errno, &to->si_errno);
err |= __put_user((short)from->si_code, &to->si_code);
err |= __put_user(from->si_code, &to->si_code);
/* First 32bits of unions are always present. */
err |= __put_user(from->si_pid, &to->si_pid);
switch (from->si_code >> 16) {
......@@ -108,6 +115,7 @@ sys32_sigaltstack(const stack_ia32_t *uss_ptr, stack_ia32_t *uoss_ptr,
mm_segment_t seg;
if (uss_ptr) {
u32 ptr;
memset(&uss,0,sizeof(stack_t));
if (!access_ok(VERIFY_READ,uss_ptr,sizeof(stack_ia32_t)) ||
__get_user(ptr, &uss_ptr->ss_sp) ||
__get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
......@@ -340,8 +348,11 @@ ia32_setup_sigcontext(struct sigcontext_ia32 *sc, struct _fpstate_ia32 *fpstate,
tmp = save_i387_ia32(current, fpstate, regs, 0);
if (tmp < 0)
err = -EFAULT;
else
else {
current->used_math = 0;
stts();
err |= __put_user((u32)(u64)(tmp ? fpstate : NULL), &sc->fpstate);
}
/* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask);
......
......@@ -187,12 +187,58 @@ ipc_parse_version32 (int *cmd)
}
}
static int put_semid(void *user_semid, struct semid64_ds *s, int version)
{
int err2;
switch (version) {
case IPC_64: {
struct semid64_ds32 *usp64 = (struct semid64_ds32 *) user_semid;
if (!access_ok(VERIFY_WRITE, usp64, sizeof(*usp64))) {
err2 = -EFAULT;
break;
}
err2 = __put_user(s->sem_perm.key, &usp64->sem_perm.key);
err2 |= __put_user(s->sem_perm.uid, &usp64->sem_perm.uid);
err2 |= __put_user(s->sem_perm.gid, &usp64->sem_perm.gid);
err2 |= __put_user(s->sem_perm.cuid, &usp64->sem_perm.cuid);
err2 |= __put_user(s->sem_perm.cgid, &usp64->sem_perm.cgid);
err2 |= __put_user(s->sem_perm.mode, &usp64->sem_perm.mode);
err2 |= __put_user(s->sem_perm.seq, &usp64->sem_perm.seq);
err2 |= __put_user(s->sem_otime, &usp64->sem_otime);
err2 |= __put_user(s->sem_ctime, &usp64->sem_ctime);
err2 |= __put_user(s->sem_nsems, &usp64->sem_nsems);
break;
}
default: {
struct semid_ds32 *usp32 = (struct semid_ds32 *) user_semid;
if (!access_ok(VERIFY_WRITE, usp32, sizeof(*usp32))) {
err2 = -EFAULT;
break;
}
err2 = __put_user(s->sem_perm.key, &usp32->sem_perm.key);
err2 |= __put_user(s->sem_perm.uid, &usp32->sem_perm.uid);
err2 |= __put_user(s->sem_perm.gid, &usp32->sem_perm.gid);
err2 |= __put_user(s->sem_perm.cuid, &usp32->sem_perm.cuid);
err2 |= __put_user(s->sem_perm.cgid, &usp32->sem_perm.cgid);
err2 |= __put_user(s->sem_perm.mode, &usp32->sem_perm.mode);
err2 |= __put_user(s->sem_perm.seq, &usp32->sem_perm.seq);
err2 |= __put_user(s->sem_otime, &usp32->sem_otime);
err2 |= __put_user(s->sem_ctime, &usp32->sem_ctime);
err2 |= __put_user(s->sem_nsems, &usp32->sem_nsems);
break;
}
}
return err2;
}
static int
semctl32 (int first, int second, int third, void *uptr)
{
union semun fourth;
u32 pad;
int err = 0, err2;
int err;
struct semid64_ds s;
mm_segment_t old_fs;
int version = ipc_parse_version32(&third);
......@@ -225,46 +271,10 @@ semctl32 (int first, int second, int third, void *uptr)
fourth.__pad = &s;
old_fs = get_fs();
set_fs(KERNEL_DS);
err = sys_semctl(first, second|IPC_64, third, fourth);
err = sys_semctl(first, second, third|IPC_64, fourth);
set_fs(old_fs);
if (version == IPC_64) {
struct semid64_ds32 *usp64 = (struct semid64_ds32 *) A(pad);
if (!access_ok(VERIFY_WRITE, usp64, sizeof(*usp64))) {
err = -EFAULT;
break;
}
err2 = __put_user(s.sem_perm.key, &usp64->sem_perm.key);
err2 |= __put_user(s.sem_perm.uid, &usp64->sem_perm.uid);
err2 |= __put_user(s.sem_perm.gid, &usp64->sem_perm.gid);
err2 |= __put_user(s.sem_perm.cuid, &usp64->sem_perm.cuid);
err2 |= __put_user(s.sem_perm.cgid, &usp64->sem_perm.cgid);
err2 |= __put_user(s.sem_perm.mode, &usp64->sem_perm.mode);
err2 |= __put_user(s.sem_perm.seq, &usp64->sem_perm.seq);
err2 |= __put_user(s.sem_otime, &usp64->sem_otime);
err2 |= __put_user(s.sem_ctime, &usp64->sem_ctime);
err2 |= __put_user(s.sem_nsems, &usp64->sem_nsems);
} else {
struct semid_ds32 *usp32 = (struct semid_ds32 *) A(pad);
if (!access_ok(VERIFY_WRITE, usp32, sizeof(*usp32))) {
err = -EFAULT;
break;
}
err2 = __put_user(s.sem_perm.key, &usp32->sem_perm.key);
err2 |= __put_user(s.sem_perm.uid, &usp32->sem_perm.uid);
err2 |= __put_user(s.sem_perm.gid, &usp32->sem_perm.gid);
err2 |= __put_user(s.sem_perm.cuid, &usp32->sem_perm.cuid);
err2 |= __put_user(s.sem_perm.cgid, &usp32->sem_perm.cgid);
err2 |= __put_user(s.sem_perm.mode, &usp32->sem_perm.mode);
err2 |= __put_user(s.sem_perm.seq, &usp32->sem_perm.seq);
err2 |= __put_user(s.sem_otime, &usp32->sem_otime);
err2 |= __put_user(s.sem_ctime, &usp32->sem_ctime);
err2 |= __put_user(s.sem_nsems, &usp32->sem_nsems);
}
if (err2)
err = -EFAULT;
if (!err)
err = put_semid((void *)A(pad), &s, version);
break;
default:
err = -EINVAL;
......@@ -343,6 +353,7 @@ do_sys32_msgrcv (int first, int second, int msgtyp, int third, int version, void
return err;
}
static int
msgctl32 (int first, int second, void *uptr)
{
......@@ -387,7 +398,6 @@ msgctl32 (int first, int second, void *uptr)
set_fs(KERNEL_DS);
err = sys_msgctl(first, second|IPC_64, (void *) &m64);
set_fs(old_fs);
if (version == IPC_64) {
if (!access_ok(VERIFY_WRITE, up64, sizeof(*up64))) {
err = -EFAULT;
......@@ -608,7 +618,9 @@ shmctl32 (int first, int second, void *uptr)
if (err2)
err = -EFAULT;
break;
default:
err = -EINVAL;
break;
}
return err;
}
......
......@@ -8,7 +8,7 @@
* This allows to access 64bit processes too; but there is no way to see the extended
* register contents.
*
* $Id: ptrace32.c,v 1.12 2002/03/24 13:02:02 ak Exp $
* $Id: ptrace32.c,v 1.16 2003/03/14 16:06:35 ak Exp $
*/
#include <linux/kernel.h>
......@@ -22,11 +22,9 @@
#include <asm/errno.h>
#include <asm/debugreg.h>
#include <asm/i387.h>
#include <asm/desc.h>
#include <asm/ldt.h>
#include <asm/fpu32.h>
#include <linux/mm.h>
#include <linux/ptrace.h>
#include <linux/mm.h>
#define R32(l,q) \
case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break
......@@ -39,29 +37,26 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
switch (regno) {
case offsetof(struct user32, regs.fs):
if (val && (val & 3) != 3) return -EIO;
child->thread.fs = val;
child->thread.fs = val & 0xffff;
break;
case offsetof(struct user32, regs.gs):
if (val && (val & 3) != 3) return -EIO;
child->thread.gs = val;
child->thread.gs = val & 0xffff;
break;
case offsetof(struct user32, regs.ds):
if (val && (val & 3) != 3) return -EIO;
child->thread.ds = val;
child->thread.ds = val & 0xffff;
break;
case offsetof(struct user32, regs.es):
if (val && (val & 3) != 3) return -EIO;
child->thread.es = val;
child->thread.es = val & 0xffff;
break;
case offsetof(struct user32, regs.ss):
if ((val & 3) != 3) return -EIO;
stack[offsetof(struct pt_regs, ss)/8] = val;
stack[offsetof(struct pt_regs, ss)/8] = val & 0xffff;
break;
case offsetof(struct user32, regs.cs):
if ((val & 3) != 3) return -EIO;
stack[offsetof(struct pt_regs, cs)/8] = val;
stack[offsetof(struct pt_regs, cs)/8] = val & 0xffff;
break;
R32(ebx, rbx);
......@@ -79,8 +74,16 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val)
stack[offsetof(struct pt_regs, eflags)/8] = val & 0x44dd5;
break;
case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[6]):
child->thread.debugreg[(regno-offsetof(struct user32, u_debugreg[0]))/4] = val;
case offsetof(struct user32, u_debugreg[4]):
case offsetof(struct user32, u_debugreg[5]):
return -EIO;
case offsetof(struct user32, u_debugreg[0]) ...
offsetof(struct user32, u_debugreg[3]):
case offsetof(struct user32, u_debugreg[6]):
child->thread.debugreg
[(regno-offsetof(struct user32, u_debugreg[0]))/4]
= val;
break;
case offsetof(struct user32, u_debugreg[7]):
......@@ -170,11 +173,19 @@ static struct task_struct *find_target(int request, int pid, int *err)
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
*err = ptrace_check_attach(child,0);
if (*err == 0)
if (child) {
*err = -EPERM;
if (child->pid == 1)
goto out;
*err = ptrace_check_attach(child, request == PTRACE_KILL);
if (*err < 0)
goto out;
return child;
}
out:
put_task_struct(child);
return NULL;
}
extern asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, unsigned long data);
......@@ -187,6 +198,9 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
__u32 val;
switch (request) {
default:
return sys_ptrace(request, pid, addr, data);
case PTRACE_PEEKTEXT:
case PTRACE_PEEKDATA:
case PTRACE_POKEDATA:
......@@ -201,9 +215,6 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
case PTRACE_GETFPXREGS:
break;
default:
ret = sys_ptrace(request, pid, addr, data);
return ret;
}
child = find_target(request, pid, &ret);
......@@ -261,7 +272,6 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
ret = -EIO;
break;
}
empty_fpu(child);
ret = 0;
for ( i = 0; i <= 16*4; i += sizeof(u32) ) {
ret |= __get_user(tmp, (u32 *) (unsigned long) data);
......@@ -271,33 +281,47 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
break;
}
case PTRACE_SETFPREGS:
empty_fpu(child);
case PTRACE_GETFPREGS:
ret = -EIO;
if (!access_ok(VERIFY_READ, (void *)(u64)data,
sizeof(struct user_i387_struct)))
break;
save_i387_ia32(child, (void *)(u64)data, childregs, 1);
ret = 0;
break;
case PTRACE_GETFPREGS:
empty_fpu(child);
restore_i387_ia32(child, (void *)(u64)data, 1);
case PTRACE_SETFPREGS:
ret = -EIO;
if (!access_ok(VERIFY_WRITE, (void *)(u64)data,
sizeof(struct user_i387_struct)))
break;
ret = 0;
/* don't check EFAULT to be bug-to-bug compatible to i386 */
restore_i387_ia32(child, (void *)(u64)data, 1);
break;
case PTRACE_GETFPXREGS: {
struct user32_fxsr_struct *u = (void *)(u64)data;
empty_fpu(child);
ret = copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u));
ret |= __put_user(childregs->cs, &u->fcs);
ret |= __put_user(child->thread.ds, &u->fos);
if (ret)
init_fpu(child);
ret = -EIO;
if (!access_ok(VERIFY_WRITE, u, sizeof(*u)))
break;
ret = -EFAULT;
if (__copy_to_user(u, &child->thread.i387.fxsave, sizeof(*u)))
break;
ret = __put_user(childregs->cs, &u->fcs);
ret |= __put_user(child->thread.ds, &u->fos);
break;
}
case PTRACE_SETFPXREGS: {
struct user32_fxsr_struct *u = (void *)(u64)data;
empty_fpu(child);
/* no error checking to be bug to bug compatible with i386 */
copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u));
unlazy_fpu(child);
ret = -EIO;
if (!access_ok(VERIFY_READ, u, sizeof(*u)))
break;
/* no checking to be bug-to-bug compatible with i386 */
__copy_from_user(&child->thread.i387.fxsave, u, sizeof(*u));
child->used_math = 1;
child->thread.i387.fxsave.mxcsr &= 0xffbf;
ret = 0;
break;
......
......@@ -57,6 +57,7 @@
#include <linux/binfmts.h>
#include <linux/init.h>
#include <linux/aio_abi.h>
#include <linux/aio.h>
#include <linux/compat.h>
#include <linux/vfs.h>
#include <linux/ptrace.h>
......@@ -74,6 +75,7 @@
#define A(__x) ((unsigned long)(__x))
#define AA(__x) ((unsigned long)(__x))
#define u32_to_ptr(x) ((void *)(u64)(x))
#define ROUND_UP(x,a) ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
#define NAME_OFFSET(de) ((int) ((de)->d_name - (char *) (de)))
......@@ -738,7 +740,7 @@ asmlinkage ssize_t sys_readv(unsigned long,const struct iovec *,unsigned long);
asmlinkage ssize_t sys_writev(unsigned long,const struct iovec *,unsigned long);
static struct iovec *
get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, int type, int *errp)
get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 *count, int type, int *errp)
{
int i;
u32 buf, len;
......@@ -747,15 +749,18 @@ get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, i
/* Get the "struct iovec" from user memory */
if (!count)
*errp = 0;
if (!*count)
return 0;
if (count > UIO_MAXIOV)
*errp = -EINVAL;
if (*count > UIO_MAXIOV)
return(struct iovec *)0;
if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*count))
*errp = -EFAULT;
if(verify_area(VERIFY_READ, iov32, sizeof(struct compat_iovec)*(*count)))
return(struct iovec *)0;
if (count > UIO_FASTIOV) {
if (*count > UIO_FASTIOV) {
*errp = -ENOMEM;
iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL);
iov = kmalloc(*count*sizeof(struct iovec), GFP_KERNEL);
if (!iov)
return((struct iovec *)0);
} else
......@@ -763,14 +768,19 @@ get_compat_iovec(struct compat_iovec *iov32, struct iovec *iov_buf, u32 count, i
ivp = iov;
totlen = 0;
for (i = 0; i < count; i++) {
for (i = 0; i < *count; i++) {
*errp = __get_user(len, &iov32->iov_len) |
__get_user(buf, &iov32->iov_base);
if (*errp)
goto error;
*errp = verify_area(type, (void *)A(buf), len);
if (*errp)
if (*errp) {
if (i > 0) {
*count = i;
break;
}
goto error;
}
/* SuS checks: */
*errp = -EINVAL;
if ((int)len < 0)
......@@ -799,7 +809,7 @@ sys32_readv(int fd, struct compat_iovec *vector, u32 count)
int ret;
mm_segment_t old_fs = get_fs();
if ((iov = get_compat_iovec(vector, iovstack, count, VERIFY_WRITE, &ret)) == NULL)
if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_WRITE, &ret)) == NULL)
return ret;
set_fs(KERNEL_DS);
ret = sys_readv(fd, iov, count);
......@@ -817,7 +827,7 @@ sys32_writev(int fd, struct compat_iovec *vector, u32 count)
int ret;
mm_segment_t old_fs = get_fs();
if ((iov = get_compat_iovec(vector, iovstack, count, VERIFY_READ, &ret)) == NULL)
if ((iov = get_compat_iovec(vector, iovstack, &count, VERIFY_READ, &ret)) == NULL)
return ret;
set_fs(KERNEL_DS);
ret = sys_writev(fd, iov, count);
......@@ -1672,21 +1682,26 @@ static int nargs(u32 src, char **dst)
return cnt;
}
long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
asmlinkage long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
{
mm_segment_t oldseg;
char **buf;
int na,ne;
char **buf = NULL;
int na = 0,ne = 0;
int ret;
unsigned sz;
unsigned sz = 0;
if (argv) {
na = nargs(argv, NULL);
if (na < 0)
return -EFAULT;
}
if (envp) {
ne = nargs(envp, NULL);
if (ne < 0)
return -EFAULT;
}
if (argv || envp) {
sz = (na+ne)*sizeof(void *);
if (sz > PAGE_SIZE)
buf = vmalloc(sz);
......@@ -1694,14 +1709,19 @@ long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
buf = kmalloc(sz, GFP_KERNEL);
if (!buf)
return -ENOMEM;
}
if (argv) {
ret = nargs(argv, buf);
if (ret < 0)
goto free;
}
if (envp) {
ret = nargs(envp, buf + na);
if (ret < 0)
goto free;
}
name = getname(name);
ret = PTR_ERR(name);
......@@ -1710,7 +1730,7 @@ long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
oldseg = get_fs();
set_fs(KERNEL_DS);
ret = do_execve(name, buf, buf+na, &regs);
ret = do_execve(name, argv ? buf : NULL, envp ? buf+na : NULL, &regs);
set_fs(oldseg);
if (ret == 0)
......@@ -1719,10 +1739,12 @@ long sys32_execve(char *name, u32 argv, u32 envp, struct pt_regs regs)
putname(name);
free:
if (argv || envp) {
if (sz > PAGE_SIZE)
vfree(buf);
else
kfree(buf);
}
return ret;
}
......@@ -2012,12 +2034,8 @@ long asmlinkage sys32_nfsservctl(int cmd, void *notused, void *notused2)
long sys32_module_warning(void)
{
static long warn_time = -(60*HZ);
if (time_before(warn_time + 60*HZ,jiffies) && strcmp(current->comm,"klogd")) {
printk(KERN_INFO "%s: 32bit 2.4.x modutils not supported on 64bit kernel\n",
current->comm);
warn_time = jiffies;
}
return -ENOSYS ;
}
......@@ -2055,6 +2073,7 @@ long sys32_sched_getaffinity(pid_t pid, unsigned int len,
return err;
}
extern long sys_io_setup(unsigned nr_reqs, aio_context_t *ctx);
long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p)
......@@ -2071,48 +2090,47 @@ long sys32_io_setup(unsigned nr_reqs, u32 *ctx32p)
return ret;
}
extern asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr,
struct iocb **iocbpp);
long sys32_io_submit(aio_context_t ctx_id, unsigned long nr,
asmlinkage long sys32_io_submit(aio_context_t ctx_id, int nr,
u32 *iocbpp)
{
mm_segment_t oldfs = get_fs();
int k, err = 0;
struct iocb **iocb64;
if (nr > 128)
struct kioctx *ctx;
long ret = 0;
int i;
if (unlikely(nr < 0))
return -EINVAL;
if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
return -EFAULT;
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx)) {
pr_debug("EINVAL: io_submit: invalid context id\n");
return -EINVAL;
iocb64 = kmalloc(sizeof(struct iocb *) * nr, GFP_KERNEL);
if (!iocb64)
return -ENOMEM;
for (k = 0; k < nr && !err; k++) {
u64 val1, val2;
u32 iocb32;
struct iocb *iocb;
err = get_user(iocb32, (u32 *)(u64)iocbpp[k]);
iocb64[k] = iocb = (void *)(u64)iocb32;
if (get_user(val1, &iocb->aio_buf) ||
get_user(val2, &iocb->aio_nbytes))
err = -EFAULT;
else if (!val1) /* should check cmd */
;
else if (verify_area(VERIFY_WRITE, (void*)val1, val2))
err = -EFAULT;
/* paranoia check - remove it when you are sure they
are not pointers */
if (get_user(val1, &iocb->aio_reserved2) || val1 ||
get_user(val2, &iocb->aio_reserved2) || val2)
err = -EFAULT;
}
if (!err) {
set_fs(KERNEL_DS);
err = sys_io_submit(ctx_id, nr, iocb64);
set_fs(oldfs);
for (i=0; i<nr; i++) {
u32 p32;
struct iocb *user_iocb, tmp;
if (unlikely(__get_user(p32, iocbpp + i))) {
ret = -EFAULT;
break;
}
kfree(iocb64);
return err;
user_iocb = u32_to_ptr(p32);
if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
ret = -EFAULT;
break;
}
ret = io_submit_one(ctx, user_iocb, &tmp);
if (ret)
break;
}
put_ioctx(ctx);
return i ? i : ret;
}
extern asmlinkage long sys_io_getevents(aio_context_t ctx_id,
......@@ -2140,7 +2158,7 @@ asmlinkage long sys32_io_getevents(aio_context_t ctx_id,
set_fs(KERNEL_DS);
ret = sys_io_getevents(ctx_id,min_nr,nr,events,timeout ? &t : NULL);
set_fs(oldfs);
if (timeout && put_compat_timespec(&t, timeout))
if (!ret && timeout && put_compat_timespec(&t, timeout))
return -EFAULT;
return ret;
}
......@@ -2172,12 +2190,8 @@ asmlinkage long sys32_open(const char * filename, int flags, int mode)
long sys32_vm86_warning(void)
{
static long warn_time = -(60*HZ);
if (time_before(warn_time + 60*HZ,jiffies)) {
printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
current->comm);
warn_time = jiffies;
}
return -ENOSYS ;
}
......
......@@ -10,14 +10,13 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
setup64.o bluesmoke.o bootflag.o e820.o reboot.o
obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o suspend_asm.o
obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_SLEEP) += wakeup.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o
......
......@@ -105,7 +105,8 @@ void __init iommu_hole_init(void)
if (!fix && !fallback_aper_force)
return;
printk("Your BIOS is broken and doesn't leave a aperture memory hole\n");
printk("Your BIOS doesn't leave a aperture memory hole\n");
printk("Please enable the IOMMU option in the BIOS setup\n");
aper_alloc = allocate_aperture();
if (!aper_alloc)
return;
......
......@@ -408,7 +408,7 @@ void __init setup_local_APIC (void)
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
apic_write(APIC_ESR, 0);
value = apic_read(APIC_ESR);
printk("ESR value before enabling vector: %08x\n", value);
Dprintk("ESR value before enabling vector: %08x\n", value);
value = ERROR_APIC_VECTOR; // enables sending errors
apic_write_around(APIC_LVTERR, value);
......@@ -418,7 +418,7 @@ void __init setup_local_APIC (void)
if (maxlvt > 3)
apic_write(APIC_ESR, 0);
value = apic_read(APIC_ESR);
printk("ESR value after enabling vector: %08x\n", value);
Dprintk("ESR value after enabling vector: %08x\n", value);
} else {
if (esr_disable)
/*
......@@ -1080,9 +1080,10 @@ int __init APIC_init_uniprocessor (void)
if (nmi_watchdog == NMI_LOCAL_APIC)
check_nmi_watchdog();
#ifdef CONFIG_X86_IO_APIC
if (smp_found_config)
if (!skip_ioapic_setup && nr_ioapics)
if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
else
nr_ioapics = 0;
#endif
setup_boot_APIC_clock();
......
/*
* arch/x86_64/kernel/bluesmoke.c - x86-64 Machine Check Exception Reporting
*
RED-PEN: need to add power management to restore after S3 wakeup.
* Machine check handler.
* K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
* Rest from unknown author(s).
*/
#include <linux/config.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/smp.h>
#include <linux/config.h>
#include <linux/irq.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/ctype.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/msr.h>
#include <asm/apic.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/kdebug.h>
#include <linux/pci.h>
#include <linux/timer.h>
#ifdef CONFIG_X86_MCE
static int mce_disabled __initdata;
static unsigned long mce_cpus;
static int mce_disabled __initdata = 0;
/*
* Machine Check Handler For PII/PIII/K7
*/
static int banks;
static unsigned long ignored_banks, disabled_banks;
/* Machine Check on everything dubious. This is a good setting
for device driver testing. */
#define K8_DRIVER_DEBUG ((1<<13)-1)
/* Report RAM errors and Hyper Transport Problems, but ignore Device
aborts and GART errors. */
#define K8_NORMAL_OP 0xff
/*
* Machine Check Handler For Hammer
*/
#ifdef CONFIG_MCE_DEBUG
static u32 k8_nb_flags __initdata = K8_DRIVER_DEBUG;
#else
static u32 k8_nb_flags __initdata = K8_NORMAL_OP;
#endif
static void hammer_machine_check(struct pt_regs * regs, long error_code)
static void generic_machine_check(struct pt_regs * regs, long error_code)
{
int recover=1;
u32 alow, ahigh, high, low;
u32 mcgstl, mcgsth;
int i;
preempt_disable();
rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
if(mcgstl&(1<<0)) /* Recoverable ? */
recover=0;
printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", smp_processor_id(), mcgsth, mcgstl);
preempt_disable();
for (i=0;i<banks;i++) {
if (regs && (mcgstl & 2))
printk(KERN_EMERG "RIP <%02lx>:%016lx RSP %016lx\n",
regs->cs, regs->rip, regs->rsp);
for(i=0;i<banks;i++)
{
if ((1UL<<i) & ignored_banks)
continue;
rdmsr(MSR_IA32_MC0_STATUS+i*4,low, high);
if(high&(1<<31)) {
if(high&(1<<31))
{
if(high&(1<<29))
recover|=1;
if(high&(1<<25))
recover|=2;
printk(KERN_EMERG "Bank %d: %08x%08x", i, high, low);
high&=~(1<<31);
if(high&(1<<27)) {
if(high&(1<<27))
{
rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
printk("[%08x%08x]", ahigh, alow);
printk("[%08x%08x]", alow, ahigh);
}
if(high&(1<<26)) {
if(high&(1<<26))
{
rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
printk(" at %08x%08x", ahigh, alow);
printk(" at %08x%08x",
ahigh, alow);
}
printk("\n");
/* Clear it */
......@@ -68,7 +90,6 @@ static void hammer_machine_check(struct pt_regs * regs, long error_code)
wmb();
}
}
preempt_enable();
if(recover&2)
panic("CPU context corrupt");
......@@ -77,16 +98,13 @@ static void hammer_machine_check(struct pt_regs * regs, long error_code)
printk(KERN_EMERG "Attempting to continue.\n");
mcgstl&=~(1<<2);
wrmsr(MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
}
/*
* Handle unconfigured int18 (should never happen)
*/
preempt_enable();
}
static void unexpected_machine_check(struct pt_regs * regs, long error_code)
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
printk("unexpected machine check %lx\n", error_code);
}
/*
......@@ -95,56 +113,194 @@ static void unexpected_machine_check(struct pt_regs * regs, long error_code)
static void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
asmlinkage void do_machine_check(struct pt_regs * regs, long error_code)
void do_machine_check(struct pt_regs * regs, long error_code)
{
notify_die(DIE_NMI, "machine check", regs, error_code, 255, SIGKILL);
machine_check_vector(regs, error_code);
}
/*
* K8 machine check.
*/
#ifdef CONFIG_X86_MCE_NONFATAL
static struct timer_list mce_timer;
static int timerset = 0;
#define MCE_RATE 15*HZ /* timer rate is 15s */
static struct pci_dev *find_k8_nb(void)
{
struct pci_dev *dev;
int cpu = smp_processor_id();
pci_for_each_dev(dev) {
if (dev->bus->number==0 && PCI_FUNC(dev->devfn)==3 &&
PCI_SLOT(dev->devfn) == (24+cpu))
return dev;
}
return NULL;
}
static void mce_checkregs (void *info)
static void check_k8_nb(void)
{
u32 low, high;
int i;
struct pci_dev *nb;
nb = find_k8_nb();
if (nb == NULL)
return;
for (i=0; i<banks; i++) {
rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
u32 statuslow, statushigh;
pci_read_config_dword(nb, 0x48, &statuslow);
pci_read_config_dword(nb, 0x4c, &statushigh);
if (!(statushigh & (1<<31)))
return;
printk(KERN_ERR "Northbridge status %08x%08x\n",
statushigh,statuslow);
if (statuslow & 0x10)
printk(KERN_ERR "GART error %d\n", statuslow & 0xf);
if (statushigh & (1<<31))
printk(KERN_ERR "Lost an northbridge error\n");
if (statushigh & (1<<25))
printk(KERN_EMERG "NB status: unrecoverable\n");
if (statushigh & (1<<26)) {
u32 addrhigh, addrlow;
pci_read_config_dword(nb, 0x54, &addrhigh);
pci_read_config_dword(nb, 0x50, &addrlow);
printk(KERN_ERR "NB error address %08x%08x\n", addrhigh,addrlow);
}
if (statushigh & (1<<29))
printk(KERN_EMERG "Error uncorrected\n");
statushigh &= ~(1<<31);
pci_write_config_dword(nb, 0x4c, statushigh);
}
if ((low | high) != 0) {
printk (KERN_EMERG "MCE: The hardware reports a non fatal, correctable incident occurred on CPU %d.\n", smp_processor_id());
printk (KERN_EMERG "Bank %d: %08x%08x\n", i, high, low);
static void k8_machine_check(struct pt_regs * regs, long error_code)
{
u64 status, nbstatus;
/* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
preempt_disable();
/* Serialize */
wmb();
rdmsrl(MSR_IA32_MCG_STATUS, status);
if ((status & (1<<2)) == 0) {
if (!regs)
check_k8_nb();
return;
}
if (status & 1)
printk(KERN_EMERG "MCG_STATUS: unrecoverable\n");
rdmsrl(MSR_IA32_MC0_STATUS+4*4, nbstatus);
if ((nbstatus & (1UL<<63)) == 0)
goto others;
printk(KERN_EMERG "Northbridge Machine Check %s %016lx %lx\n",
regs ? "exception" : "timer",
(unsigned long)nbstatus, error_code);
if (nbstatus & (1UL<<62))
printk(KERN_EMERG "Lost at least one NB error condition\n");
if (nbstatus & (1UL<<61))
printk(KERN_EMERG "Uncorrectable condition\n");
if (nbstatus & (1UL<57))
printk(KERN_EMERG "Unrecoverable condition\n");
check_k8_nb();
if (nbstatus & (1UL<<58)) {
u64 adr;
rdmsrl(MSR_IA32_MC0_ADDR+4*4, adr);
printk(KERN_EMERG "Address: %016lx\n", (unsigned long)adr);
}
wrmsrl(MSR_IA32_MC0_STATUS+4*4, 0);
wrmsrl(MSR_IA32_MCG_STATUS, 0);
if (regs && (status & (1<<1)))
printk(KERN_EMERG "MCE at EIP %lx ESP %lx\n", regs->rip, regs->rsp);
others:
generic_machine_check(regs, error_code);
preempt_enable();
}
static struct timer_list mcheck_timer;
int mcheck_interval = 30*HZ;
#ifndef CONFIG_SMP
static void mcheck_timer_handler(unsigned long data)
{
k8_machine_check(NULL,0);
mcheck_timer.expires = jiffies + mcheck_interval;
add_timer(&mcheck_timer);
}
#else
/* SMP needs a process context trampoline because smp_call_function cannot be
called from interrupt context. */
static void mcheck_timer_other(void *data)
{
k8_machine_check(NULL, 0);
}
static void mce_timerfunc (unsigned long data)
static void mcheck_timer_dist(void *data)
{
on_each_cpu (mce_checkregs, NULL, 1, 1);
smp_call_function(mcheck_timer_other,0,0,0);
k8_machine_check(NULL, 0);
mcheck_timer.expires = jiffies + mcheck_interval;
add_timer(&mcheck_timer);
}
/* Refresh the timer. */
mce_timer.expires = jiffies + MCE_RATE;
add_timer (&mce_timer);
static void mcheck_timer_handler(unsigned long data)
{
static DECLARE_WORK(mcheck_work, mcheck_timer_dist, NULL);
schedule_work(&mcheck_work);
}
#endif
static int nok8 __initdata;
static void __init k8_mcheck_init(struct cpuinfo_x86 *c)
{
u64 cap;
int i;
struct pci_dev *nb;
if (!test_bit(X86_FEATURE_MCE, &c->x86_capability) ||
!test_bit(X86_FEATURE_MCA, &c->x86_capability))
return;
rdmsrl(MSR_IA32_MCG_CAP, cap);
banks = cap&0xff;
machine_check_vector = k8_machine_check;
for (i = 0; i < banks; i++) {
u64 val = ((1UL<<i) & disabled_banks) ? 0 : ~0UL;
wrmsrl(MSR_IA32_MC0_CTL+4*i, val);
wrmsrl(MSR_IA32_MC0_STATUS+4*i,0);
}
nb = find_k8_nb();
if (nb != NULL) {
u32 reg, reg2;
pci_read_config_dword(nb, 0x40, &reg);
pci_write_config_dword(nb, 0x40, k8_nb_flags);
pci_read_config_dword(nb, 0x44, &reg2);
pci_write_config_dword(nb, 0x44, reg2);
printk(KERN_INFO "Machine Check for K8 Northbridge %d enabled (%x,%x)\n",
nb->devfn, reg, reg2);
ignored_banks |= (1UL<<4);
}
set_in_cr4(X86_CR4_MCE);
if (mcheck_interval && (smp_processor_id() == 0)) {
init_timer(&mcheck_timer);
mcheck_timer.function = (void (*)(unsigned long))mcheck_timer_handler;
mcheck_timer.expires = jiffies + mcheck_interval;
add_timer(&mcheck_timer);
}
printk(KERN_INFO "Machine Check Reporting enabled for CPU#%d\n", smp_processor_id());
}
/*
* Set up machine check reporting for processors with Intel style MCE
* Set up machine check reporting for Intel processors
*/
static void __init hammer_mcheck_init(struct cpuinfo_x86 *c)
static void __init generic_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
......@@ -154,33 +310,36 @@ static void __init hammer_mcheck_init(struct cpuinfo_x86 *c)
* Check for MCE support
*/
if( !test_bit(X86_FEATURE_MCE, c->x86_capability) )
if( !test_bit(X86_FEATURE_MCE, &c->x86_capability) )
return;
/* Check for PPro style MCA */
if( !test_bit(X86_FEATURE_MCA, c->x86_capability) )
/*
* Check for PPro style MCA
*/
if( !test_bit(X86_FEATURE_MCA, &c->x86_capability) )
return;
/* Ok machine check is available */
machine_check_vector = hammer_machine_check;
machine_check_vector = generic_machine_check;
wmb();
if(done==0)
printk(KERN_INFO "Machine check architecture supported.\n");
printk(KERN_INFO "Intel machine check architecture supported.\n");
rdmsr(MSR_IA32_MCG_CAP, l, h);
if(l&(1<<8)) /* Control register present ? */
if(l&(1<<8))
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
banks = l&0xff;
for(i=0; i<banks; i++)
wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
for(i=0; i<banks; i++)
for(i=0;i<banks;i++)
{
u32 val = ((1UL<<i) & disabled_banks) ? 0 : ~0;
wrmsr(MSR_IA32_MC0_CTL+4*i, val, val);
wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
}
set_in_cr4(X86_CR4_MCE);
printk(KERN_INFO "Machine check reporting enabled on CPU#%d.\n", smp_processor_id());
printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id());
done=1;
}
......@@ -190,30 +349,22 @@ static void __init hammer_mcheck_init(struct cpuinfo_x86 *c)
void __init mcheck_init(struct cpuinfo_x86 *c)
{
if (test_and_set_bit(smp_processor_id(), &mce_cpus))
return;
if(mce_disabled==1)
return;
switch(c->x86_vendor)
{
switch(c->x86_vendor) {
case X86_VENDOR_AMD:
hammer_mcheck_init(c);
#ifdef CONFIG_X86_MCE_NONFATAL
if (timerset == 0) {
/* Set the timer to check for non-fatal
errors every MCE_RATE seconds */
init_timer (&mce_timer);
mce_timer.expires = jiffies + MCE_RATE;
mce_timer.data = 0;
mce_timer.function = &mce_timerfunc;
add_timer (&mce_timer);
timerset = 1;
printk(KERN_INFO "Machine check exception polling timer started.\n");
}
#endif
if (c->x86 == 15 && !nok8) {
k8_mcheck_init(c);
break;
}
/* FALL THROUGH */
default:
case X86_VENDOR_INTEL:
generic_mcheck_init(c);
break;
}
}
......@@ -224,16 +375,33 @@ static int __init mcheck_disable(char *str)
return 0;
}
/* mce=off disable machine check
mce=nok8 disable k8 specific features
mce=disable<NUMBER> disable bank NUMBER
mce=enable<NUMBER> enable bank number
mce=device Enable device driver test reporting in NB
mce=NUMBER mcheck timer interval number seconds.
Can be also comma separated in a single mce= */
static int __init mcheck_enable(char *str)
{
mce_disabled = -1;
char *p;
while ((p = strsep(&str,",")) != NULL) {
if (isdigit(*p))
mcheck_interval = simple_strtol(p,NULL,0) * HZ;
else if (!strcmp(p,"off"))
mce_disabled = 1;
else if (!strncmp(p,"enable",6))
disabled_banks &= ~(1<<simple_strtol(p+6,NULL,0));
else if (!strncmp(p,"disable",7))
disabled_banks |= ~(1<<simple_strtol(p+7,NULL,0));
else if (!strcmp(p,"nok8"))
nok8 = 1;
else if (!strcmp(p,"device"))
k8_nb_flags = K8_DRIVER_DEBUG;
}
return 0;
}
__setup("nomce", mcheck_disable);
__setup("mce", mcheck_enable);
#else
asmlinkage void do_machine_check(struct pt_regs * regs, long error_code) {}
void __init mcheck_init(struct cpuinfo_x86 *c) {}
#endif
......@@ -359,8 +359,13 @@ ENTRY(stub_rt_sigreturn)
/* 0(%rsp): interrupt number */
.macro interrupt func
cld
#ifdef CONFIG_X86_REMOTE_DEBUG
SAVE_ALL
movq %rsp,%rdi
#else
SAVE_ARGS
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
#endif
testl $3,CS(%rdi)
je 1f
swapgs
......@@ -667,18 +672,6 @@ ENTRY(execve)
ret
ENTRY(page_fault)
#ifdef CONFIG_KDB
pushq %rcx
pushq %rdx
pushq %rax
movl $473,%ecx
rdmsr
andl $0xfffffffe,%eax /* Disable last branch recording */
wrmsr
popq %rax
popq %rdx
popq %rcx
#endif
errorentry do_page_fault
ENTRY(coprocessor_error)
......
......@@ -194,7 +194,7 @@ ENTRY(no_long_mode)
jmp 1b
.org 0xf00
pGDT32:
ENTRY(pGDT32):
.word gdt32_end-gdt_table32
.long gdt_table32-__START_KERNEL_map
......@@ -307,6 +307,15 @@ ENTRY(level3_physmem_pgt)
.quad 0x0000000000105007 /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */
.org 0xb000
ENTRY(wakeup_level4_pgt)
.quad 0x0000000000102007 /* -> level3_ident_pgt */
.fill 255,8,0
.quad 0x000000000010a007
.fill 254,8,0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad 0x0000000000103007 /* -> level3_kernel_pgt */
.org 0xc000
.data
.align 16
......@@ -371,3 +380,4 @@ ENTRY(idt_table)
.quad 0
.quad 0
.endr
......@@ -15,6 +15,7 @@
#include <asm/processor.h>
#include <asm/proto.h>
#include <asm/smp.h>
#include <asm/bootsetup.h>
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
......@@ -51,7 +52,7 @@ static void __init copy_bootdata(char *real_mode_data)
printk("old bootloader convention, maybe loadlin?\n");
}
command_line = (char *) ((u64)(new_data));
memcpy(saved_command_line, command_line, 2048);
memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
printk("Bootdata ok (command line is %s)\n", saved_command_line);
}
......
......@@ -42,7 +42,7 @@ void __init fpu_init(void)
/* clean state in init */
stts();
clear_thread_flag(TIF_USEDFPU);
current_thread_info()->status = 0;
current->used_math = 0;
}
......@@ -51,13 +51,12 @@ void __init fpu_init(void)
* so initialize it and set the mxcsr to its default.
* remeber the current task has used the FPU.
*/
void init_fpu(void)
void init_fpu(struct task_struct *child)
{
struct task_struct *me = current;
memset(&me->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
me->thread.i387.fxsave.cwd = 0x37f;
me->thread.i387.fxsave.mxcsr = 0x1f80;
me->used_math = 1;
memset(&child->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct));
child->thread.i387.fxsave.cwd = 0x37f;
child->thread.i387.fxsave.mxcsr = 0x1f80;
child->used_math = 1;
}
/*
......@@ -81,7 +80,7 @@ int save_i387(struct _fpstate *buf)
if (!tsk->used_math)
return 0;
tsk->used_math = 0; /* trigger finit */
if (test_thread_flag(TIF_USEDFPU)) {
if (tsk->thread_info->status & TS_USEDFPU) {
err = save_i387_checking((struct i387_fxsave_struct *)buf);
if (err) return err;
stts();
......@@ -99,7 +98,7 @@ int save_i387(struct _fpstate *buf)
int get_fpregs(struct user_i387_struct *buf, struct task_struct *tsk)
{
empty_fpu(tsk);
init_fpu(tsk);
return __copy_to_user((void *)buf, &tsk->thread.i387.fxsave,
sizeof(struct user_i387_struct)) ? -EFAULT : 0;
}
......
......@@ -25,13 +25,15 @@
#include <asm/mpspec.h>
#include <asm/nmi.h>
#include <asm/msr.h>
#include <asm/proto.h>
#include <asm/kdebug.h>
extern void default_do_nmi(struct pt_regs *);
unsigned int nmi_watchdog = NMI_LOCAL_APIC;
static unsigned int nmi_hz = HZ;
unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
extern void show_registers(struct pt_regs *regs);
int nmi_watchdog_disabled;
#define K7_EVNTSEL_ENABLE (1 << 22)
#define K7_EVNTSEL_INT (1 << 20)
......@@ -251,15 +253,13 @@ void touch_nmi_watchdog (void)
alert_counter[i] = 0;
}
void nmi_watchdog_tick (struct pt_regs * regs)
void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
{
if (nmi_watchdog_disabled)
return;
int sum, cpu = safe_smp_processor_id();
/*
* Since current_thread_info()-> is always on the stack, and we
* always switch the stack NMI-atomically, it's safe to use
* smp_processor_id().
*/
int sum, cpu = smp_processor_id();
sum = read_pda(apic_timer_irqs);
if (last_irq_sums[cpu] == sum) {
......@@ -269,6 +269,10 @@ void nmi_watchdog_tick (struct pt_regs * regs)
*/
alert_counter[cpu]++;
if (alert_counter[cpu] == 5*nmi_hz) {
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_BAD) {
alert_counter[cpu] = 0;
return;
}
spin_lock(&nmi_print_lock);
/*
* We are in trouble anyway, lets at least try
......
......@@ -8,7 +8,7 @@
* See Documentation/DMA-mapping.txt for the interface specification.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
* $Id: pci-gart.c,v 1.12 2002/09/19 19:25:32 ak Exp $
* $Id: pci-gart.c,v 1.20 2003/03/12 08:23:29 ak Exp $
*/
/*
......@@ -19,9 +19,12 @@ agpgart_be
possible future tuning:
fast path for sg streaming mappings
more intelligent flush strategy - flush only a single NB?
more intelligent flush strategy - flush only a single NB? flush only when
gart area fills up and alloc_iommu wraps.
don't flush on allocation - need to unmap the gart area first to avoid prefetches
by the CPU
move boundary between IOMMU and AGP in GART dynamically
could use exact fit in the gart in alloc_consistent, not order of two.
*/
#include <linux/config.h>
......@@ -49,7 +52,11 @@ u32 *iommu_gatt_base; /* Remapping table */
int no_iommu;
static int no_agp;
#ifdef CONFIG_IOMMU_DEBUG
int force_mmu = 1;
#else
int force_mmu = 0;
#endif
extern int fallback_aper_order;
extern int fallback_aper_force;
......@@ -58,10 +65,9 @@ extern int fallback_aper_force;
static spinlock_t iommu_bitmap_lock = SPIN_LOCK_UNLOCKED;
static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
#define GPTE_MASK 0xfffffff000
#define GPTE_VALID 1
#define GPTE_COHERENT 2
#define GPTE_ENCODE(x,flag) (((x) & 0xfffffff0) | ((x) >> 28) | GPTE_VALID | (flag))
#define GPTE_ENCODE(x) (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
#define for_all_nb(dev) \
......@@ -72,7 +78,6 @@ static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
extern int agp_amdk8_init(void);
extern int agp_init(void);
#define AGPEXTERN extern
#else
......@@ -130,7 +135,7 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
{
void *memory;
int gfp = GFP_ATOMIC;
int order, i;
int i;
unsigned long iommu_page;
if (hwdev == NULL || hwdev->dma_mask < 0xffffffff || no_iommu)
......@@ -140,15 +145,15 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
* First try to allocate continuous and use directly if already
* in lowmem.
*/
order = get_order(size);
memory = (void *)__get_free_pages(gfp, order);
size = round_up(size, PAGE_SIZE);
memory = (void *)__get_free_pages(gfp, get_order(size));
if (memory == NULL) {
return NULL;
} else {
int high = (unsigned long)virt_to_bus(memory) + size
>= 0xffffffff;
int mmu = high;
if (force_mmu)
if (force_mmu && !(gfp & GFP_DMA))
mmu = 1;
if (no_iommu) {
if (high) goto error;
......@@ -161,19 +166,21 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
}
}
iommu_page = alloc_iommu(1<<order);
size >>= PAGE_SHIFT;
iommu_page = alloc_iommu(size);
if (iommu_page == -1)
goto error;
/* Fill in the GATT, allocating pages as needed. */
for (i = 0; i < 1<<order; i++) {
for (i = 0; i < size; i++) {
unsigned long phys_mem;
void *mem = memory + i*PAGE_SIZE;
if (i > 0)
atomic_inc(&virt_to_page(mem)->count);
phys_mem = virt_to_phys(mem);
BUG_ON(phys_mem & ~PTE_MASK);
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem,GPTE_COHERENT);
BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK);
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
}
flush_gart();
......@@ -181,7 +188,7 @@ void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
return memory;
error:
free_pages((unsigned long)memory, order);
free_pages((unsigned long)memory, get_order(size));
return NULL;
}
......@@ -193,30 +200,32 @@ void pci_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t bus)
{
u64 pte;
int order = get_order(size);
unsigned long iommu_page;
int i;
size = round_up(size, PAGE_SIZE);
if (bus < iommu_bus_base || bus > iommu_bus_base + iommu_size) {
free_pages((unsigned long)vaddr, order);
free_pages((unsigned long)vaddr, get_order(size));
return;
}
size >>= PAGE_SHIFT;
iommu_page = (bus - iommu_bus_base) / PAGE_SIZE;
for (i = 0; i < 1<<order; i++) {
for (i = 0; i < size; i++) {
pte = iommu_gatt_base[iommu_page + i];
BUG_ON((pte & GPTE_VALID) == 0);
iommu_gatt_base[iommu_page + i] = 0;
free_page((unsigned long) __va(GPTE_DECODE(pte)));
}
flush_gart();
free_iommu(iommu_page, 1<<order);
free_iommu(iommu_page, size);
}
#ifdef CONFIG_IOMMU_LEAK
/* Debugging aid for drivers that don't free their IOMMU tables */
static void **iommu_leak_tab;
static int leak_trace;
int iommu_leak_dumppages = 20;
int iommu_leak_pages = 20;
extern unsigned long printk_address(unsigned long);
void dump_leak(void)
{
int i;
......@@ -224,10 +233,13 @@ void dump_leak(void)
if (dump || !iommu_leak_tab) return;
dump = 1;
show_stack(NULL);
printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_dumppages);
for (i = 0; i < iommu_leak_dumppages; i++)
printk("[%lu: %lx] ",
iommu_pages-i,(unsigned long) iommu_leak_tab[iommu_pages-i]);
/* Very crude. dump some from the end of the table too */
printk("Dumping %d pages from end of IOMMU:\n", iommu_leak_pages);
for (i = 0; i < iommu_leak_pages; i+=2) {
printk("%lu: ", iommu_pages-i);
printk_address((unsigned long) iommu_leak_tab[iommu_pages-i]);
printk("%c", (i+1)%2 == 0 ? '\n' : ' ');
}
printk("\n");
}
#endif
......@@ -275,7 +287,8 @@ static inline int need_iommu(struct pci_dev *dev, unsigned long addr, size_t siz
return mmu;
}
dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir)
dma_addr_t __pci_map_single(struct pci_dev *dev, void *addr, size_t size,
int dir, int flush)
{
unsigned long iommu_page;
unsigned long phys_mem, bus;
......@@ -297,13 +310,18 @@ dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir)
phys_mem &= PAGE_MASK;
for (i = 0; i < npages; i++, phys_mem += PAGE_SIZE) {
BUG_ON(phys_mem & ~PTE_MASK);
BUG_ON(phys_mem & ~PHYSICAL_PAGE_MASK);
/*
* Set coherent mapping here to avoid needing to flush
* the caches on mapping.
*/
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem, GPTE_COHERENT);
iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
#ifdef CONFIG_IOMMU_DEBUG
/* paranoia check */
BUG_ON(GPTE_DECODE(iommu_gatt_base[iommu_page+i]) != phys_mem);
#endif
#ifdef CONFIG_IOMMU_LEAK
/* XXX need eventually caller of pci_map_sg */
......@@ -311,6 +329,7 @@ dma_addr_t pci_map_single(struct pci_dev *dev, void *addr, size_t size,int dir)
iommu_leak_tab[iommu_page + i] = __builtin_return_address(0);
#endif
}
if (flush)
flush_gart();
bus = iommu_bus_base + iommu_page*PAGE_SIZE;
......@@ -341,7 +360,7 @@ void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr,
free_iommu(iommu_page, npages);
}
EXPORT_SYMBOL(pci_map_single);
EXPORT_SYMBOL(__pci_map_single);
EXPORT_SYMBOL(pci_unmap_single);
static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
......@@ -438,8 +457,6 @@ static __init int init_k8_gatt(agp_kern_info *info)
}
flush_gart();
global_flush_tlb();
printk("PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10);
return 0;
......@@ -530,8 +547,10 @@ void __init pci_iommu_init(void)
off don't use the IOMMU
leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on)
memaper[=order] allocate an own aperture over RAM with size 32MB^order.
noforce don't force IOMMU usage. Should be fastest.
force Force IOMMU and turn on unmap debugging.
*/
__init int iommu_setup(char *opt, char **end)
__init int iommu_setup(char *opt)
{
int arg;
char *p = opt;
......@@ -552,17 +571,21 @@ __init int iommu_setup(char *opt, char **end)
fallback_aper_order = arg;
}
#ifdef CONFIG_IOMMU_LEAK
if (!memcmp(p,"leak", 4))
if (!memcmp(p,"leak", 4)) {
leak_trace = 1;
p += 4;
if (*p == '=') ++p;
if (isdigit(*p) && get_option(&p, &arg))
iommu_leak_pages = arg;
} else
#endif
if (isdigit(*p) && get_option(&p, &arg))
iommu_size = arg;
do {
if (*p == ' ' || *p == 0) {
*end = p;
if (*p == ' ' || *p == 0)
return 0;
}
} while (*p++ != ',');
}
return 1;
}
......@@ -192,6 +192,8 @@ void show_regs(struct pt_regs * regs)
fs,fsindex,gs,gsindex,shadowgs);
printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
show_trace(&regs->rsp);
}
extern void load_gs_index(unsigned);
......@@ -260,6 +262,14 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
(((u32)desc->base2) << 24);
}
/*
* This gets called before we allocate a new thread and copy
* the current task into it.
*/
void prepare_to_copy(struct task_struct *tsk)
{
unlazy_fpu(tsk);
}
int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
unsigned long unused,
......@@ -294,9 +304,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
asm("movl %%es,%0" : "=m" (p->thread.es));
asm("movl %%ds,%0" : "=m" (p->thread.ds));
unlazy_fpu(me);
p->thread.i387 = me->thread.i387;
if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
p->thread.io_bitmap_ptr = kmalloc((IO_BITMAP_SIZE+1)*4, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr)
......@@ -314,7 +321,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
err = ia32_child_tls(p, childregs);
else
#endif
err = do_arch_prctl(p, ARCH_SET_FS, childregs->r10);
err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
if (err)
goto out;
}
......
......@@ -240,8 +240,8 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
unsigned long tmp;
ret = -EIO;
if ((addr & 3) || addr < 0 ||
addr > sizeof(struct user) - 3)
if ((addr & 7) || addr < 0 ||
addr > sizeof(struct user) - 7)
break;
tmp = 0; /* Default return condition */
......@@ -250,7 +250,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
if(addr >= (long) &dummy->u_debugreg[0] &&
addr <= (long) &dummy->u_debugreg[7]){
addr -= (long) &dummy->u_debugreg[0];
addr = addr >> 2;
addr = addr >> 3;
tmp = child->thread.debugreg[addr];
}
ret = put_user(tmp,(unsigned long *) data);
......@@ -268,8 +268,8 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
ret = -EIO;
if ((addr & 3) || addr < 0 ||
addr > sizeof(struct user) - 3)
if ((addr & 7) || addr < 0 ||
addr > sizeof(struct user) - 7)
break;
if (addr < sizeof(struct user_regs_struct)) {
......@@ -290,6 +290,11 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
if(addr < (long) &dummy->u_debugreg[4] &&
((unsigned long) data) >= TASK_SIZE-3) break;
if (addr == (long) &dummy->u_debugreg[6]) {
if (data >> 32)
goto out_tsk;
}
if(addr == (long) &dummy->u_debugreg[7]) {
data &= ~DR_CONTROL_RESERVED;
for(i=0; i<4; i++)
......@@ -298,7 +303,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
}
addr -= (long) &dummy->u_debugreg;
addr = addr >> 2;
addr = addr >> 3;
child->thread.debugreg[addr] = data;
ret = 0;
}
......
......@@ -101,7 +101,6 @@ void machine_restart(char * __unused)
* Stop all CPUs and turn off local APICs and the IO-APIC, so
* other OSs see a clean IRQ state.
*/
if (notify_die(DIE_STOP,"cpustop",0,0) != NOTIFY_BAD)
smp_send_stop();
disable_IO_APIC();
#endif
......
......@@ -197,12 +197,15 @@ static __init void parse_cmdline_early (char ** cmdline_p)
if (!memcmp(from, "acpi=off", 8))
acpi_disabled = 1;
if (!memcmp(from, "disableapic", 11))
disable_apic = 1;
if (!memcmp(from, "mem=", 4))
parse_memopt(from+4, &from);
#ifdef CONFIG_GART_IOMMU
if (!memcmp(from,"iommu=",6)) {
iommu_setup(from+6, &from);
iommu_setup(from+6);
}
#endif
......
......@@ -40,7 +40,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __cacheline_aligned;
unsigned long __supported_pte_mask = ~0UL;
static int do_not_nx = 0;
static int do_not_nx = 1;
static int __init nonx_setup(char *str)
{
......@@ -98,6 +98,8 @@ void pda_init(int cpu)
pda->cpudata_offset = 0;
pda->kernelstack =
(unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
pda->active_mm = &init_mm;
pda->mmu_state = 0;
if (cpu == 0) {
/* others are initialized in smpboot.c */
......@@ -121,8 +123,6 @@ void pda_init(int cpu)
asm volatile("movq %0,%%cr3" :: "r" (__pa(level4)));
pda->irqstackptr += IRQSTACKSIZE-64;
pda->active_mm = &init_mm;
pda->mmu_state = 0;
}
#define EXCEPTION_STK_ORDER 0 /* >= N_EXCEPTION_STACKS*EXCEPTION_STKSZ */
......
......@@ -225,6 +225,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
rsp = regs->rsp - 128;
/* This is the X/Open sanctioned signal stack switching. */
/* RED-PEN: redzone on that stack? */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(rsp) == 0)
rsp = current->sas_ss_sp + current->sas_ss_size;
......@@ -433,7 +434,8 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
* have been cleared if the watchpoint triggered
* inside the kernel.
*/
__asm__("movq %0,%%db7" : : "r" (current->thread.debugreg[7]));
if (current->thread.debugreg[7])
asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg[7]));
/* Whee! Actually deliver the signal. */
handle_signal(signr, &info, oldset, regs);
......
......@@ -485,16 +485,16 @@ asmlinkage void smp_call_function_interrupt(void)
}
}
/* Slow. Should be only used for debugging. */
int slow_smp_processor_id(void)
{
int stack_location;
unsigned long sp = (unsigned long)&stack_location;
int cpu;
unsigned long mask;
int offset = 0, cpu;
for (offset = 0; (cpu_online_map >> offset); offset = cpu + 1) {
cpu = ffz(~(cpu_online_map >> offset));
for_each_cpu(cpu, mask) {
if (sp >= (u64)cpu_pda[cpu].irqstackptr - IRQSTACKSIZE &&
sp <= (u64)cpu_pda[cpu].irqstackptr)
return cpu;
......
......@@ -318,8 +318,6 @@ void __init smp_callin(void)
*/
smp_store_cpu_info(cpuid);
notify_die(DIE_CPUINIT, "cpuinit", NULL, 0);
local_irq_disable();
/*
......@@ -898,6 +896,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
*/
if (!skip_ioapic_setup && nr_ioapics)
setup_IO_APIC();
else
nr_ioapics = 0;
setup_boot_APIC_clock();
......
......@@ -28,7 +28,7 @@
#include <asm/io.h>
#include <asm/proto.h>
static struct saved_context saved_context;
struct saved_context saved_context;
unsigned long saved_context_eax, saved_context_ebx, saved_context_ecx, saved_context_edx;
unsigned long saved_context_esp, saved_context_ebp, saved_context_esi, saved_context_edi;
......
......@@ -110,7 +110,7 @@ int printk_address(unsigned long address)
}
#endif
static inline unsigned long *in_exception_stack(int cpu, unsigned long stack)
unsigned long *in_exception_stack(int cpu, unsigned long stack)
{
int k;
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
......@@ -249,7 +249,7 @@ void show_stack(unsigned long * rsp)
void dump_stack(void)
{
unsigned long dummy;
show_stack(&dummy);
show_trace(&dummy);
}
void show_registers(struct pt_regs *regs)
......@@ -344,7 +344,6 @@ void die(const char * str, struct pt_regs * regs, long err)
show_registers(regs);
bust_spinlocks(0);
spin_unlock_irq(&die_lock);
notify_die(DIE_OOPS, (char *)str, regs, err);
do_exit(SIGSEGV);
}
......@@ -419,6 +418,8 @@ static void do_trap(int trapnr, int signr, char *str,
#define DO_ERROR(trapnr, signr, str, name) \
asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
{ \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
return; \
do_trap(trapnr, signr, str, regs, error_code, NULL); \
}
......@@ -430,10 +431,13 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
info.si_errno = 0; \
info.si_code = sicode; \
info.si_addr = (void *)siaddr; \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) == NOTIFY_BAD) \
return; \
do_trap(trapnr, signr, str, regs, error_code, &info); \
}
DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip)
DO_ERROR( 3, SIGTRAP, "int3", int3);
DO_ERROR( 4, SIGSEGV, "overflow", overflow)
DO_ERROR( 5, SIGSEGV, "bounds", bounds)
DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip)
......@@ -446,13 +450,6 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, get_cr2())
DO_ERROR(18, SIGSEGV, "reserved", reserved)
asmlinkage void do_int3(struct pt_regs * regs, long error_code)
{
if (notify_die(DIE_INT3, "int3", regs, error_code) == NOTIFY_BAD)
return;
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
}
extern void dump_pagetable(unsigned long);
asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
......@@ -493,6 +490,8 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
regs->rip = fixup->fixup;
return;
}
notify_die(DIE_GPF, "general protection fault", regs, error_code,
13, SIGSEGV);
die("general protection fault", regs, error_code);
}
}
......@@ -537,14 +536,14 @@ asmlinkage void default_do_nmi(struct pt_regs * regs)
* so it must be the NMI watchdog.
*/
if (nmi_watchdog) {
nmi_watchdog_tick(regs);
nmi_watchdog_tick(regs,reason);
return;
}
#endif
unknown_nmi_error(reason, regs);
return;
}
if (notify_die(DIE_NMI, "nmi", regs, reason) == NOTIFY_BAD)
if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_BAD)
return;
if (reason & 0x80)
mem_parity_error(reason, regs);
......@@ -569,6 +568,7 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
#ifdef CONFIG_CHECKING
{
/* RED-PEN interaction with debugger - could destroy gs */
unsigned long gs;
struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
rdmsrl(MSR_GS_BASE, gs);
......@@ -583,9 +583,6 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
conditional_sti(regs);
if (notify_die(DIE_DEBUG, "debug", regs, error_code) == NOTIFY_BAD)
return;
/* Mask out spurious debug traps due to lazy DR7 setting */
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
if (!tsk->thread.debugreg[7]) {
......@@ -618,17 +615,22 @@ asmlinkage void do_debug(struct pt_regs * regs, long error_code)
info.si_signo = SIGTRAP;
info.si_errno = 0;
info.si_code = TRAP_BRKPT;
info.si_addr = ((regs->cs & 3) == 0) ? (void *)tsk->thread.rip :
(void *)regs->rip;
if ((regs->cs & 3) == 0)
goto clear_dr7;
info.si_addr = (void *)regs->rip;
force_sig_info(SIGTRAP, &info, tsk);
clear_dr7:
asm("movq %0,%%db7"::"r"(0UL));
asm volatile("movq %0,%%db7"::"r"(0UL));
notify_die(DIE_DEBUG, "debug", regs, error_code, 1, SIGTRAP);
return;
clear_TF_reenable:
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
clear_TF:
/* RED-PEN could cause spurious errors */
if (notify_die(DIE_DEBUG, "debug2", regs, error_code, 1, SIGTRAP) != NOTIFY_BAD)
regs->eflags &= ~TF_MASK;
return;
}
......@@ -775,9 +777,9 @@ asmlinkage void math_state_restore(void)
clts(); /* Allow maths ops (or we recurse) */
if (!me->used_math)
init_fpu();
init_fpu(me);
restore_fpu_checking(&me->thread.i387.fxsave);
set_thread_flag(TIF_USEDFPU);
me->thread_info->status |= TS_USEDFPU;
}
asmlinkage void math_emulate(void)
......@@ -787,7 +789,7 @@ asmlinkage void math_emulate(void)
void do_call_debug(struct pt_regs *regs)
{
notify_die(DIE_CALL, "debug call", regs, 0);
notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT);
}
void __init trap_init(void)
......@@ -819,8 +821,6 @@ void __init trap_init(void)
set_intr_gate(KDB_VECTOR, call_debug);
notify_die(DIE_TRAPINIT, "traps initialized", 0, 0);
/*
* Should be a barrier for any external CPU state.
*/
......
......@@ -57,29 +57,41 @@ void bust_spinlocks(int yes)
}
}
static int bad_address(void *p)
{
unsigned long dummy;
return __get_user(dummy, (unsigned long *)p);
}
void dump_pagetable(unsigned long address)
{
static char *name[] = { "PML4", "PGD", "PDE", "PTE" };
int i, shift;
unsigned long page;
shift = 9+9+9+12;
address &= ~0xFFFF000000000000UL;
asm("movq %%cr3,%0" : "=r" (page));
for (i = 0; i < 4; i++) {
unsigned long *padr = (unsigned long *) __va(page);
padr += (address >> shift) & 0x1FFU;
if (__get_user(page, padr)) {
printk("%s: bad %p\n", name[i], padr);
break;
}
printk("%s: %016lx ", name[i], page);
if ((page & (1 | (1<<7))) != 1) /* Not present or 2MB page */
break;
page &= ~0xFFFUL;
shift -= (i == 0) ? 12 : 9;
}
pml4_t *pml4;
asm("movq %%cr3,%0" : "=r" (pml4));
pml4 = __va((unsigned long)pml4 & PHYSICAL_PAGE_MASK);
pml4 += pml4_index(address);
printk("PML4 %lx ", pml4_val(*pml4));
if (bad_address(pml4)) goto bad;
if (!pml4_present(*pml4)) goto ret;
pgd_t *pgd = __pgd_offset_k((pgd_t *)pml4_page(*pml4), address);
if (bad_address(pgd)) goto bad;
printk("PGD %lx ", pgd_val(*pgd));
if (!pgd_present(*pgd)) goto ret;
pmd_t *pmd = pmd_offset(pgd, address);
if (bad_address(pmd)) goto bad;
printk("PMD %lx ", pmd_val(*pmd));
if (!pmd_present(*pmd)) goto ret;
pte_t *pte = pte_offset_kernel(pmd, address);
if (bad_address(pte)) goto bad;
printk("PTE %lx", pte_val(*pte));
ret:
printk("\n");
return;
bad:
printk("BAD\n");
}
int page_fault_trace;
......
......@@ -150,7 +150,7 @@ void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flag
*/
offset = phys_addr & ~PAGE_MASK;
phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(last_addr) - phys_addr;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
/*
* Ok, go for it..
......
......@@ -21,7 +21,7 @@
#include <asm/pci-direct.h>
#include <asm/numa.h>
static int find_northbridge(void)
static __init int find_northbridge(void)
{
int num;
......@@ -45,7 +45,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
{
unsigned long prevbase;
struct node nodes[MAXNODE];
int nodeid, numnodes, maxnode, i, nb;
int nodeid, i, nb;
int found = 0;
nb = find_northbridge();
if (nb < 0)
......@@ -53,12 +54,13 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
numnodes = (read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3;
numnodes = (1 << ((read_pci_config(0, nb, 0, 0x60 ) >> 4) & 3));
printk(KERN_INFO "Assuming %d nodes\n", numnodes - 1);
memset(&nodes,0,sizeof(nodes));
prevbase = 0;
maxnode = -1;
for (i = 0; i < MAXNODE; i++) {
for (i = 0; i < numnodes; i++) {
unsigned long base,limit;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
......@@ -66,18 +68,16 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
nodeid = limit & 3;
if (!limit) {
printk(KERN_INFO "Skipping node entry %d (base %lx)\n", i, base);
continue;
printk(KERN_ERR "Skipping node entry %d (base %lx)\n", i, base);
return -1;
}
if ((base >> 8) & 3 || (limit >> 8) & 3) {
printk(KERN_ERR "Node %d using interleaving mode %lx/%lx\n",
nodeid, (base>>8)&3, (limit>>8) & 3);
return -1;
}
if (nodeid > maxnode)
maxnode = nodeid;
if ((1UL << nodeid) & nodes_present) {
printk("Node %d already present. Skipping\n", nodeid);
printk(KERN_INFO "Node %d already present. Skipping\n", nodeid);
continue;
}
......@@ -98,17 +98,19 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
base = start;
if (limit > end)
limit = end;
if (limit == base)
if (limit == base) {
printk(KERN_ERR "Empty node %d\n", nodeid);
continue;
}
if (limit < base) {
printk(KERN_INFO"Node %d bogus settings %lx-%lx. Ignored.\n",
printk(KERN_ERR "Node %d bogus settings %lx-%lx.\n",
nodeid, base, limit);
continue;
return -1;
}
/* Could sort here, but pun for now. Should not happen anyroads. */
if (prevbase > base) {
printk(KERN_INFO "Node map not sorted %lx,%lx\n",
printk(KERN_ERR "Node map not sorted %lx,%lx\n",
prevbase,base);
return -1;
}
......@@ -116,23 +118,26 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
printk(KERN_INFO "Node %d MemBase %016lx Limit %016lx\n",
nodeid, base, limit);
found++;
nodes[nodeid].start = base;
nodes[nodeid].end = limit;
prevbase = base;
}
if (maxnode <= 0)
if (!found)
return -1;
memnode_shift = compute_hash_shift(nodes,maxnode,end);
memnode_shift = compute_hash_shift(nodes);
if (memnode_shift < 0) {
printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n");
return -1;
}
printk(KERN_INFO "Using node hash shift of %d\n", memnode_shift);
early_for_all_nodes(i) {
for (i = 0; i < numnodes; i++) {
if (nodes[i].start != nodes[i].end)
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
......
......@@ -26,11 +26,10 @@ u8 memnodemap[NODEMAPSIZE];
static int numa_off __initdata;
unsigned long nodes_present;
int maxnode;
static int emunodes __initdata;
int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem)
int __init compute_hash_shift(struct node *nodes)
{
int i;
int shift = 24;
......@@ -39,12 +38,16 @@ int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem)
/* When in doubt use brute force. */
while (shift < 48) {
memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE);
early_for_all_nodes (i) {
for (i = 0; i < numnodes; i++) {
if (nodes[i].start == nodes[i].end)
continue;
for (addr = nodes[i].start;
addr < nodes[i].end;
addr += (1UL << shift)) {
if (memnodemap[addr >> shift] != 0xff) {
printk("node %d shift %d addr %Lx conflict %d\n",
if (memnodemap[addr >> shift] != 0xff &&
memnodemap[addr >> shift] != i) {
printk(KERN_INFO
"node %d shift %d addr %Lx conflict %d\n",
i, shift, addr, memnodemap[addr>>shift]);
goto next;
}
......@@ -101,9 +104,8 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, bootmap_pages<<PAGE_SHIFT);
if (nodeid > maxnode)
maxnode = nodeid;
if (nodeid + 1 > numnodes)
numnodes = nodeid + 1;
nodes_present |= (1UL << nodeid);
}
......@@ -151,6 +153,7 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
int i;
if (emunodes > MAXNODE)
emunodes = MAXNODE;
memset(&nodes, 0, sizeof(nodes));
printk(KERN_INFO "Faking %d nodes of size %ld MB\n", emunodes, nodesize>>20);
for (i = 0; i < emunodes; i++) {
unsigned long end = (i+1)*nodesize;
......@@ -160,7 +163,7 @@ int __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
nodes[i].end = end;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
memnode_shift = compute_hash_shift(nodes, emunodes, nodes[i-1].end);
memnode_shift = compute_hash_shift(nodes);
return 0;
}
......
......@@ -618,11 +618,20 @@ void pcibios_penalize_isa_irq(int irq)
int pirq_enable_irq(struct pci_dev *dev)
{
u8 pin;
extern int interrupt_line_quirk;
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
/* With IDE legacy devices the IRQ lookup failure is not a problem.. */
if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
return 0;
printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.\n",
'A' + pin - 1, dev->slot_name);
}
/* VIA bridges use interrupt line for apic/pci steering across
the V-Link */
else if (interrupt_line_quirk)
pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
return 0;
}
......@@ -75,7 +75,7 @@ extern void smp_local_timer_interrupt (struct pt_regs * regs);
extern void setup_boot_APIC_clock (void);
extern void setup_secondary_APIC_clock (void);
extern void setup_apic_nmi_watchdog (void);
extern inline void nmi_watchdog_tick (struct pt_regs * regs);
extern void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason);
extern int APIC_init_uniprocessor (void);
extern void disable_APIC_timer(void);
extern void enable_APIC_timer(void);
......
......@@ -58,7 +58,7 @@
We can slow the instruction pipeline for instructions coming via the
gdt or the ldt if we want to. I am not sure why this is an advantage */
#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
#define DR_CONTROL_RESERVED (0xFFFFFFFFFC00) /* Reserved by Intel */
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
......
......@@ -8,6 +8,7 @@
#ifndef __ASSEMBLY__
#include <asm/segment.h>
#include <asm/mmu.h>
// 8 byte segment descriptor
struct desc_struct {
......
......@@ -7,6 +7,4 @@
#ifndef __ASMx86_64_HDREG_H
#define __ASMx86_64_HDREG_H
typedef unsigned long ide_ioreg_t;
#endif /* __ASMx86_64_HDREG_H */
......@@ -19,15 +19,15 @@
#include <asm/thread_info.h>
extern void fpu_init(void);
extern void init_fpu(void);
int save_i387(struct _fpstate *buf);
extern void init_fpu(struct task_struct *child);
extern int save_i387(struct _fpstate *buf);
static inline int need_signal_i387(struct task_struct *me)
{
if (!me->used_math)
return 0;
me->used_math = 0;
if (me->thread_info->flags & _TIF_USEDFPU)
if (me->thread_info->status & TS_USEDFPU)
return 0;
return 1;
}
......@@ -39,14 +39,14 @@ static inline int need_signal_i387(struct task_struct *me)
#define kernel_fpu_end() stts()
#define unlazy_fpu(tsk) do { \
if ((tsk)->thread_info->flags & _TIF_USEDFPU) \
if ((tsk)->thread_info->status & TS_USEDFPU) \
save_init_fpu(tsk); \
} while (0)
#define clear_fpu(tsk) do { \
if ((tsk)->thread_info->flags & _TIF_USEDFPU) { \
if ((tsk)->thread_info->status & TS_USEDFPU) { \
asm volatile("fwait"); \
(tsk)->thread_info->flags &= ~_TIF_USEDFPU; \
(tsk)->thread_info->status &= ~TS_USEDFPU; \
stts(); \
} \
} while (0)
......@@ -114,11 +114,11 @@ static inline int save_i387_checking(struct i387_fxsave_struct *fx)
static inline void kernel_fpu_begin(void)
{
struct task_struct *me = current;
if (test_tsk_thread_flag(me,TIF_USEDFPU)) {
asm volatile("fxsave %0 ; fnclex"
: "=m" (me->thread.i387.fxsave));
clear_tsk_thread_flag(me, TIF_USEDFPU);
struct thread_info *me = current_thread_info();
if (me->status & TS_USEDFPU) {
asm volatile("rex64 ; fxsave %0 ; fnclex"
: "=m" (me->task->thread.i387.fxsave));
me->status &= ~TS_USEDFPU;
return;
}
clts();
......@@ -128,7 +128,7 @@ static inline void save_init_fpu( struct task_struct *tsk )
{
asm volatile( "fxsave %0 ; fnclex"
: "=m" (tsk->thread.i387.fxsave));
tsk->thread_info->flags &= ~TIF_USEDFPU;
tsk->thread_info->status &= ~TS_USEDFPU;
stts();
}
......@@ -140,18 +140,4 @@ static inline int restore_i387(struct _fpstate *buf)
return restore_fpu_checking((struct i387_fxsave_struct *)buf);
}
static inline void empty_fpu(struct task_struct *child)
{
if (!child->used_math) {
/* Simulate an empty FPU. */
memset(&child->thread.i387.fxsave,0,sizeof(struct i387_fxsave_struct));
child->thread.i387.fxsave.cwd = 0x037f;
child->thread.i387.fxsave.swd = 0;
child->thread.i387.fxsave.twd = 0;
child->thread.i387.fxsave.mxcsr = 0x1f80;
}
child->used_math = 1;
}
#endif /* __ASM_X86_64_I387_H */
......@@ -9,8 +9,13 @@ struct die_args {
struct pt_regs *regs;
const char *str;
long err;
int trapnr;
int signr;
};
/* Note - you should never unregister because that can race with NMIs.
If you really want to do it first unregister - then synchronize_kernel - then free.
*/
extern struct notifier_block *die_chain;
/* Grossly misnamed. */
......@@ -21,15 +26,16 @@ enum die_val {
DIE_PANIC,
DIE_NMI,
DIE_DIE,
DIE_NMIWATCHDOG,
DIE_KERNELDEBUG,
DIE_TRAP,
DIE_GPF,
DIE_CALL,
DIE_CPUINIT, /* not really a die, but .. */
DIE_TRAPINIT, /* not really a die, but .. */
DIE_STOP,
};
static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err)
static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig)
{
struct die_args args = { regs: regs, str: str, err: err };
struct die_args args = { .regs=regs, .str=str, .err=err, .trapnr=trap,.signr=sig };
return notifier_call_chain(&die_chain, val, &args);
}
......
......@@ -8,13 +8,11 @@ struct node {
u64 start,end;
};
#define for_all_nodes(x) for ((x) = 0; (x) <= maxnode; (x)++) \
#define for_all_nodes(x) for ((x) = 0; (x) < numnodes; (x)++) \
if ((1UL << (x)) & nodes_present)
#define early_for_all_nodes(n) \
for (n=0; n<MAXNODE;n++) if (nodes[n].start!=nodes[n].end)
extern int compute_hash_shift(struct node *nodes, int numnodes, u64 maxmem);
extern int compute_hash_shift(struct node *nodes);
extern unsigned long nodes_present;
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
......
......@@ -44,8 +44,7 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
struct pci_dev;
extern int iommu_setup(char *opt, char **end);
extern int iommu_setup(char *opt);
extern void pci_iommu_init(void);
/* Allocate and map kernel buffer using consistent mode DMA for a device.
......@@ -77,10 +76,11 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
* Once the device is given the dma address, the device owns this memory
* until either pci_unmap_single or pci_dma_sync_single is performed.
*/
extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
size_t size, int direction);
extern dma_addr_t __pci_map_single(struct pci_dev *hwdev, void *ptr,
size_t size, int direction, int flush);
extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr,
void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t addr,
size_t size, int direction);
/*
......@@ -118,12 +118,16 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
BUG_ON(direction == PCI_DMA_NONE);
}
#define PCI_DMA_BUS_IS_PHYS 0
/* The PCI address space does equal the physical memory
* address space. The networking and block device layers use
* this boolean for bounce buffer decisions.
*/
#define PCI_DMA_BUS_IS_PHYS (0)
#else
static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
size_t size, int direction)
static inline dma_addr_t __pci_map_single(struct pci_dev *hwdev, void *ptr,
size_t size, int direction, int flush)
{
dma_addr_t addr;
......@@ -210,6 +214,11 @@ extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
int nents, int direction);
static inline dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr,
size_t size, int direction)
{
return __pci_map_single(hwdev,ptr,size,direction,1);
}
#define pci_unmap_page pci_unmap_single
......
......@@ -283,6 +283,7 @@ static inline int pmd_large(pmd_t pte) {
#define pml4_page(pml4) ((unsigned long) __va(pml4_val(pml4) & PTE_MASK))
#define pml4_index(address) ((address >> PML4_SHIFT) & (PTRS_PER_PML4-1))
#define pml4_offset_k(address) (init_level4_pgt + pml4_index(address))
#define pml4_present(pml4) (pml4_val(pml4) & _PAGE_PRESENT)
#define mk_kernel_pml4(address) ((pml4_t){ (address) | _KERNPG_TABLE })
#define level3_offset_k(dir, address) ((pgd_t *) pml4_page(*(dir)) + pgd_index(address))
......
......@@ -269,7 +269,7 @@ struct mm_struct;
extern void release_thread(struct task_struct *);
/* Prepare to copy thread state - unlazy all lazy status */
#define prepare_to_copy(tsk) do { } while (0)
extern void prepare_to_copy(struct task_struct *tsk);
/*
* create a kernel thread without removing it from tasklists
......@@ -308,8 +308,8 @@ extern inline void sync_core(void)
#define ARCH_HAS_PREFETCHW
#define ARCH_HAS_SPINLOCK_PREFETCH
#define prefetch(x) __builtin_prefetch((x),0)
#define prefetchw(x) __builtin_prefetch((x),1)
#define prefetch(x) __builtin_prefetch((x),0,1)
#define prefetchw(x) __builtin_prefetch((x),1,1)
#define spin_lock_prefetch(x) prefetchw(x)
#define cpu_relax() rep_nop()
......
......@@ -6,6 +6,7 @@
/* misc architecture specific prototypes */
struct cpuinfo_x86;
struct pt_regs;
extern void get_cpu_vendor(struct cpuinfo_x86*);
extern void start_kernel(void);
......@@ -41,6 +42,8 @@ extern void free_bootmem_generic(unsigned long phys, unsigned len);
extern unsigned long end_pfn_map;
extern void show_stack(unsigned long * rsp);
extern void show_trace(unsigned long * rsp);
extern void show_registers(struct pt_regs *regs);
extern void exception_table_check(void);
......
......@@ -11,7 +11,7 @@ arch_prepare_suspend(void)
{
}
/* image of the saved processor state */
/* Image of the saved processor state. If you touch this, fix acpi_wakeup.S. */
struct saved_context {
u16 ds, es, fs, gs, ss;
unsigned long gs_base, gs_kernel_base, fs_base;
......
......@@ -22,18 +22,18 @@
struct save_context_frame {
unsigned long rbp;
unsigned long rbx;
unsigned long r11;
unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long r11;
unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long rdi;
unsigned long rsi;
};
/* frame pointer must be last for get_wchan */
......@@ -43,19 +43,20 @@ struct save_context_frame {
rbp needs to be always explicitely saved because gcc cannot clobber the
frame pointer and the scheduler is compiled with frame pointers. -AK */
#define SAVE_CONTEXT \
__PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11) __PUSH(r12) __PUSH(r13) \
__PUSH(r14) __PUSH(r15) \
__PUSH(rdi) __PUSH(rsi) \
__PUSH(rdx) __PUSH(rcx) \
__PUSH(rsi) __PUSH(rdi) \
__PUSH(r12) __PUSH(r13) __PUSH(r14) __PUSH(r15) \
__PUSH(rdx) __PUSH(rcx) __PUSH(r8) __PUSH(r9) __PUSH(r10) __PUSH(r11) \
__PUSH(rbx) __PUSH(rbp)
#define RESTORE_CONTEXT \
__POP(rbp) __POP(rbx) \
__POP(rcx) __POP(rdx) \
__POP(rsi) __POP(rdi) \
__POP(r15) __POP(r14) __POP(r13) __POP(r12) __POP(r11) __POP(r10) \
__POP(r9) __POP(r8)
__POP(r11) __POP(r10) __POP(r9) __POP(r8) __POP(rcx) __POP(rdx) \
__POP(r15) __POP(r14) __POP(r13) __POP(r12) \
__POP(rdi) __POP(rsi)
/* RED-PEN: pipeline stall on ret because it is not predicted */
/* RED-PEN: the register saving could be optimized */
/* frame pointer must be last for get_wchan */
#define switch_to(prev,next,last) \
asm volatile(SAVE_CONTEXT \
"movq %%rsp,%[prevrsp]\n\t" \
......
......@@ -27,6 +27,7 @@ struct thread_info {
struct task_struct *task; /* main task structure */
struct exec_domain *exec_domain; /* execution domain */
__u32 flags; /* low level flags */
__u32 status; /* thread synchronous flags */
__u32 cpu; /* current CPU */
int preempt_count;
......@@ -100,16 +101,14 @@ static inline struct thread_info *stack_thread_info(void)
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
#define TIF_USEDFPU 16 /* FPU was used by this task this quantum */
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_IA32 18 /* 32bit process */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_IA32 17 /* 32bit process */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_IA32 (1<<TIF_IA32)
......@@ -118,6 +117,15 @@ static inline struct thread_info *stack_thread_info(void)
#define PREEMPT_ACTIVE 0x4000000
/*
* Thread-synchronous status.
*
* This is different from the flags in that nobody else
* ever touches our thread-synchronous status, so we don't
* have to worry about atomic accesses.
*/
#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
#endif /* __KERNEL__ */
#endif /* _ASM_THREAD_INFO_H */
......@@ -520,8 +520,10 @@ __SYSCALL(__NR_clock_gettime, sys_clock_gettime)
__SYSCALL(__NR_clock_getres, sys_clock_getres)
#define __NR_clock_nanosleep 230
__SYSCALL(__NR_clock_nanosleep, sys_clock_nanosleep)
#define __NR_exit_group 231
__SYSCALL(__NR_exit_group, sys_exit_group)
#define __NR_syscall_max __NR_clock_nanosleep
#define __NR_syscall_max __NR_exit_group
#ifndef __NO_STUBS
/* user-visible error numbers are in the range -1 - -4095 */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment