Commit 3720aca6 authored by Andi Kleen's avatar Andi Kleen Committed by Linus Torvalds

[PATCH] x86-64 update

Nothing too exciting, just some minor new features and bug fixes.
The IOMMU code should now not BUG again without CONFIG_IOMMU_DEBUG/iommu=force

 - Compiles now with -funit-at-a-time
 - Fix up ioctl 32bit tables following Andrew's change.
 - Fix __SI_POLL siginfo_t passing to user space
 - Add 32bit emulation for fadvise64_64
 - Remove unneeded sys32_utimes
 - Various merges with i386 (ACPI, APIC etc.)
 - Port cpuid patching infrastructure from i386
 - Use it to provide better copy_*_user/memcpy/memset/clear/copy_page for
   C stepping K8. String instructions are faster now that unrolled loops.
 - Don't try to merge unforced PCI mappings that don't exceed the device's
   DMA mask.
 - Discard .exit.data/.exit.text at runtime like i386
 - Don't use NTI stores for clear_user
 - Convert bitops.h bitmap functions to use unsigned long * pointers instead
   of void *
 - Fix some warnings in kernel headers.
 - Fix PDA comments
parent 49e4d78b
......@@ -41,15 +41,16 @@ CFLAGS += -mno-red-zone
CFLAGS += -mcmodel=kernel
CFLAGS += -pipe
# this makes reading assembly source easier, but produces worse code
# actually it makes the kernel smaller too.
CFLAGS += -fno-reorder-blocks
# should lower this a lot and see how much .text is saves
CFLAGS += -finline-limit=2000
CFLAGS += -Wno-sign-compare
#CFLAGS += -g
# don't enable this when you use kgdb:
ifneq ($(CONFIG_X86_REMOTE_DEBUG),y)
CFLAGS += -fno-asynchronous-unwind-tables
endif
#CFLAGS += -funit-at-a-time
head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o
......
......@@ -673,12 +673,10 @@ static int mtrr_ioctl32(unsigned int fd, unsigned int cmd, unsigned long arg)
return err;
}
#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler), NULL },
#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) },
#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl)
#define IOCTL_TABLE_START struct ioctl_trans ioctl_start[] = {
#define IOCTL_TABLE_END };
IOCTL_TABLE_START
struct ioctl_trans ioctl_start[] = {
#include <linux/compat_ioctl.h>
#define DECLARES
#include "compat_ioctl.c"
......@@ -761,6 +759,7 @@ HANDLE_IOCTL(MTRRIOC32_SET_PAGE_ENTRY, mtrr_ioctl32)
HANDLE_IOCTL(MTRRIOC32_DEL_PAGE_ENTRY, mtrr_ioctl32)
HANDLE_IOCTL(MTRRIOC32_GET_PAGE_ENTRY, mtrr_ioctl32)
HANDLE_IOCTL(MTRRIOC32_KILL_PAGE_ENTRY, mtrr_ioctl32)
IOCTL_TABLE_END
};
int ioctl_table_size = ARRAY_SIZE(ioctl_start);
......@@ -80,6 +80,10 @@ static int ia32_copy_siginfo_to_user(siginfo_t32 *to, siginfo_t *from)
default:
err |= __put_user(from->si_uid, &to->si_uid);
break;
case __SI_POLL >> 16:
err |= __put_user(from->si_band, &to->si_band);
err |= __put_user(from->si_fd, &to->si_fd);
break;
/* case __SI_RT: This is not generated by the kernel as of now. */
}
return err;
......
......@@ -475,6 +475,7 @@ ia32_sys_call_table:
.quad compat_fstatfs64 /* fstatfs64 */
.quad sys_tgkill
.quad compat_sys_utimes
.quad sys32_fadvise64_64
/* don't forget to change IA32_NR_syscalls */
ia32_syscall_end:
.rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
......
......@@ -1170,35 +1170,6 @@ sys32_rt_sigqueueinfo(int pid, int sig, siginfo_t32 *uinfo)
return ret;
}
asmlinkage long sys_utimes(char *, struct timeval *);
asmlinkage long
sys32_utimes(char *filename, struct compat_timeval *tvs)
{
char *kfilename;
struct timeval ktvs[2];
mm_segment_t old_fs;
int ret;
kfilename = getname(filename);
ret = PTR_ERR(kfilename);
if (!IS_ERR(kfilename)) {
if (tvs) {
if (get_tv32(&ktvs[0], tvs) ||
get_tv32(&ktvs[1], 1+tvs))
return -EFAULT;
}
old_fs = get_fs();
set_fs(KERNEL_DS);
ret = sys_utimes(kfilename, &ktvs[0]);
set_fs(old_fs);
putname(kfilename);
}
return ret;
}
/* These are here just in case some old ia32 binary calls it. */
asmlinkage long
sys32_pause(void)
......@@ -2027,6 +1998,17 @@ sys32_timer_create(u32 clock, struct sigevent32 *se32, timer_t *timer_id)
return err;
}
extern long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice);
long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
__u32 len_low, __u32 len_high, int advice)
{
return sys_fadvise64_64(fd,
(((u64)offset_high)<<32) | offset_low,
(((u64)len_high)<<32) | len_low,
advice);
}
long sys32_vm86_warning(void)
{
printk(KERN_INFO "%s: vm86 mode not supported on 64 bit kernel\n",
......
......@@ -47,6 +47,9 @@
#include <asm/tlbflush.h>
extern int acpi_disabled;
int acpi_lapic = 0;
int acpi_ioapic = 0;
extern int disable_apic;
#define PREFIX "ACPI: "
......@@ -76,8 +79,6 @@ __acpi_map_table (
#ifdef CONFIG_X86_LOCAL_APIC
int acpi_lapic;
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
......@@ -165,8 +166,6 @@ acpi_parse_lapic_nmi (
#ifdef CONFIG_X86_IO_APIC
int acpi_ioapic;
static int __init
acpi_parse_ioapic (
acpi_table_entry_header *header)
......@@ -292,12 +291,34 @@ acpi_find_rsdp (void)
return rsdp_phys;
}
/*
* acpi_boot_init()
* called from setup_arch(), always.
* 1. maps ACPI tables for later use
* 2. enumerates lapics
* 3. enumerates io-apics
*
* side effects:
* acpi_lapic = 1 if LAPIC found
* acpi_ioapic = 1 if IOAPIC found
* if (acpi_lapic && acpi_ioapic) smp_found_config = 1;
* if acpi_blacklisted() acpi_disabled = 1;
* acpi_irq_model=...
* ...
*
* return value: (currently ignored)
* 0: success
* !0: failure
*/
int __init
acpi_boot_init (void)
{
int result = 0;
if (acpi_disabled)
return 1;
/*
* The default interrupt routing model is PIC (8259). This gets
* overriden if IOAPICs are enumerated (below).
......@@ -316,9 +337,7 @@ acpi_boot_init (void)
printk(KERN_WARNING PREFIX "BIOS listed in blacklist, disabling ACPI support\n");
acpi_disabled = 1;
return result;
} else
printk(KERN_NOTICE PREFIX "BIOS not listed in blacklist\n");
}
extern int disable_apic;
if (disable_apic)
......@@ -391,6 +410,25 @@ acpi_boot_init (void)
* --------
*/
/*
* ACPI interpreter is required to complete interrupt setup,
* so if it is off, don't enumerate the io-apics with ACPI.
* If MPS is present, it will handle them,
* otherwise the system will stay in PIC mode
*/
if (acpi_disabled) {
return 1;
}
/*
* if "noapic" boot option, don't look for IO-APICs
*/
if (disable_apic) {
printk(KERN_INFO PREFIX "Skipping IOAPIC probe "
"due to 'noapic' option.\n");
return 1;
}
result = acpi_table_parse_madt(ACPI_MADT_IOAPIC, acpi_parse_ioapic);
if (!result) {
printk(KERN_ERR PREFIX "No IOAPIC entries present\n");
......
......@@ -38,8 +38,6 @@ int disable_apic_timer __initdata;
/* Using APIC to generate smp_local_timer_interrupt? */
int using_apic_timer = 0;
int dont_enable_local_apic __initdata = 0;
static DEFINE_PER_CPU(int, prof_multiplier) = 1;
static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
static DEFINE_PER_CPU(int, prof_counter) = 1;
......@@ -464,7 +462,6 @@ static struct {
static int lapic_suspend(struct sys_device *dev, u32 state)
{
unsigned int l, h;
unsigned long flags;
if (!apic_pm_state.active)
......@@ -486,9 +483,6 @@ static int lapic_suspend(struct sys_device *dev, u32 state)
local_save_flags(flags);
local_irq_disable();
disable_local_APIC();
rdmsr(MSR_IA32_APICBASE, l, h);
l &= ~MSR_IA32_APICBASE_ENABLE;
wrmsr(MSR_IA32_APICBASE, l, h);
local_irq_restore(flags);
return 0;
}
......@@ -1017,6 +1011,12 @@ static __init int setup_disableapic(char *str)
return 0;
}
static __init int setup_nolapic(char *str)
{
disable_apic = 1;
return 0;
}
static __init int setup_noapictimer(char *str)
{
disable_apic_timer = 1;
......@@ -1024,5 +1024,7 @@ static __init int setup_noapictimer(char *str)
}
__setup("disableapic", setup_disableapic);
__setup("nolapic", setup_nolapic); /* same as disableapic, for compatibility */
__setup("noapictimer", setup_noapictimer);
/* no "lapic" flag - we only use the lapic when the BIOS tells us so. */
......@@ -222,10 +222,23 @@ int apply_relocate(Elf_Shdr *sechdrs,
return -ENOSYS;
}
extern void apply_alternatives(void *start, void *end);
int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
const Elf_Shdr *s;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
/* look for .altinstructions to patch */
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
void *seg;
if (strcmp(".altinstructions", secstrings + s->sh_name))
continue;
seg = (void *)s->sh_addr;
apply_alternatives(seg, seg + s->sh_size);
}
return 0;
}
......
......@@ -429,8 +429,6 @@ static inline int pci_map_cont(struct scatterlist *sg, int start, int stopat,
return __pci_map_cont(sg, start, stopat, sout, pages);
}
#define PCI_NO_MERGE 0
/*
* DMA map all entries in a scatterlist.
* Merge chunks that have page aligned sizes into a continuous mapping.
......@@ -463,7 +461,7 @@ int pci_map_sg(struct pci_dev *dev, struct scatterlist *sg, int nents, int dir)
struct scatterlist *ps = &sg[i-1];
/* Can only merge when the last chunk ends on a page
boundary. */
if (PCI_NO_MERGE || !need || (i-1 > start && ps->offset) ||
if (!force_iommu || !need || (i-1 > start && ps->offset) ||
(ps->offset + ps->length) % PAGE_SIZE) {
if (pci_map_cont(sg, start, i, sg+out, pages,
need) < 0)
......
......@@ -38,6 +38,7 @@
#include <linux/root_dev.h>
#include <linux/pci.h>
#include <linux/acpi.h>
#include <linux/kallsyms.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
#include <asm/system.h>
......@@ -197,6 +198,12 @@ static __init void parse_cmdline_early (char ** cmdline_p)
if (!memcmp(from, "acpi=off", 8))
acpi_disabled = 1;
if (!memcmp(from, "acpi=force", 10)) {
/* add later when we do DMI horrors: */
/* acpi_force = 1; */
acpi_disabled = 0;
}
if (!memcmp(from, "disableapic", 11))
disable_apic = 1;
......@@ -240,6 +247,71 @@ static void __init contig_initmem_init(void)
}
#endif
/* Use inline assembly to define this because the nops are defined
as inline assembly strings in the include files and we cannot
get them easily into strings. */
asm("\t.data\nk8nops: "
K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
K8_NOP7 K8_NOP8);
extern unsigned char k8nops[];
static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
NULL,
k8nops,
k8nops + 1,
k8nops + 1 + 2,
k8nops + 1 + 2 + 3,
k8nops + 1 + 2 + 3 + 4,
k8nops + 1 + 2 + 3 + 4 + 5,
k8nops + 1 + 2 + 3 + 4 + 5 + 6,
k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
};
/* Replace instructions with better alternatives for this CPU type.
This runs before SMP is initialized to avoid SMP problems with
self modifying code. This implies that assymetric systems where
APs have less capabilities than the boot processor are not handled.
In this case boot with "noreplacement". */
void apply_alternatives(void *start, void *end)
{
struct alt_instr *a;
int diff, i, k;
for (a = start; (void *)a < end; a++) {
if (!boot_cpu_has(a->cpuid))
continue;
BUG_ON(a->replacementlen > a->instrlen);
__inline_memcpy(a->instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
/* Pad the rest with nops */
for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
k = diff;
if (k > ASM_NOP_MAX)
k = ASM_NOP_MAX;
__inline_memcpy(a->instr + i, k8_nops[k], k);
}
}
}
static int no_replacement __initdata = 0;
void __init alternative_instructions(void)
{
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
if (no_replacement)
return;
apply_alternatives(__alt_instructions, __alt_instructions_end);
}
static int __init noreplacement_setup(char *s)
{
no_replacement = 1;
return 0;
}
__setup("noreplacement", noreplacement_setup);
void __init setup_arch(char **cmdline_p)
{
......@@ -382,7 +454,7 @@ void __init setup_arch(char **cmdline_p)
/* Will likely break when you have unassigned resources with more
than 4GB memory and bridges that don't support more than 4GB.
Doing it properly would require to allocate GFP_DMA memory
Doing it properly would require to use pci_alloc_consistent
in this case. */
low_mem_size = ((end_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
if (low_mem_size > pci_mem_start)
......@@ -455,11 +527,17 @@ static void __init display_cacheinfo(struct cpuinfo_x86 *c)
static int __init init_amd(struct cpuinfo_x86 *c)
{
int r;
int level;
/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
clear_bit(0*32+31, &c->x86_capability);
/* C-stepping K8? */
level = cpuid_eax(1);
if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
set_bit(X86_FEATURE_K8_C, &c->x86_capability);
r = get_model_name(c);
if (!r) {
switch (c->x86) {
......
......@@ -679,8 +679,8 @@ void math_error(void *rip)
default:
break;
case 0x001: /* Invalid Op */
case 0x040: /* Stack Fault */
case 0x240: /* Stack Fault | Direction */
case 0x041: /* Stack Fault */
case 0x241: /* Stack Fault | Direction */
info.si_code = FPE_FLTINV;
break;
case 0x002: /* Denormalize */
......
......@@ -106,6 +106,15 @@ SECTIONS
.con_initcall.init : { *(.con_initcall.init) }
__con_initcall_end = .;
SECURITY_INIT
. = ALIGN(8);
__alt_instructions = .;
.altinstructions : { *(.altinstructions) }
__alt_instructions_end = .;
.altinstr_replacement : { *(.altinstr_replacement) }
/* .exit.text is discard at runtime, not link time, to deal with references
from .altinstructions and .eh_frame */
.exit.text : { *(.exit.text) }
.exit.data : { *(.exit.data) }
. = ALIGN(4096);
__initramfs_start = .;
.init.ramfs : { *(.init.ramfs) }
......@@ -127,8 +136,6 @@ SECTIONS
/* Sections to be discarded */
/DISCARD/ : {
*(.exit.data)
/* *(.exit.text) */
*(.exitcall.exit)
*(.eh_frame)
}
......
/*
* Zero a page.
* rdi page
......@@ -24,4 +23,28 @@ clear_page:
jnz .Lloop
nop
ret
clear_page_end:
/* C stepping K8 run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstructions,"a"
.align 8
.quad clear_page
.quad clear_page_c
.byte X86_FEATURE_K8_C
.byte clear_page_end-clear_page
.byte clear_page_c_end-clear_page_c
.previous
.section .altinstr_replacement,"ax"
clear_page_c:
movl $4096/8,%ecx
xorl %eax,%eax
rep
stosq
ret
clear_page_c_end:
.previous
......@@ -89,3 +89,26 @@ copy_page:
movq 2*8(%rsp),%r13
addq $3*8,%rsp
ret
/* C stepping K8 run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstructions,"a"
.align 8
.quad copy_page
.quad copy_page_c
.byte X86_FEATURE_K8_C
.byte copy_page_c_end-copy_page_c
.byte copy_page_c_end-copy_page_c
.previous
.section .altinstr_replacement,"ax"
copy_page_c:
movl $4096/8,%ecx
rep
movsq
ret
copy_page_c_end:
.previous
......@@ -9,6 +9,7 @@
#include <asm/current.h>
#include <asm/offset.h>
#include <asm/thread_info.h>
#include <asm/cpufeature.h>
/* Standard copy_to_user with segment limit checking */
.globl copy_to_user
......@@ -20,7 +21,23 @@ copy_to_user:
jc bad_to_user
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_to_user
jmp copy_user_generic
2:
.byte 0xe9 /* 32bit jump */
.long .Lcug-1f
1:
.section .altinstr_replacement,"ax"
3: .byte 0xe9 /* replacement jmp with 8 bit immediate */
.long copy_user_generic_c-1b /* offset */
.previous
.section .altinstructions,"a"
.align 8
.quad 2b
.quad 3b
.byte X86_FEATURE_K8_C
.byte 5
.byte 5
.previous
/* Standard copy_from_user with segment limit checking */
.globl copy_from_user
......@@ -46,6 +63,7 @@ bad_to_user:
ret
.previous
/*
* copy_user_generic - memory copy with exception handling.
*
......@@ -60,9 +78,22 @@ bad_to_user:
.globl copy_user_generic
.p2align 4
copy_user_generic:
/* Put the first cacheline into cache. This should handle
the small movements in ioctls etc., but not penalize the bigger
filesystem data copies too much. */
.byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */
.byte 0x66,0x90
1:
.section .altinstr_replacement,"ax"
2: .byte 0xe9 /* near jump with 32bit immediate */
.long copy_user_generic_c-1b /* offset */
.previous
.section .altinstructions,"a"
.align 8
.quad copy_user_generic
.quad 2b
.byte X86_FEATURE_K8_C
.byte 5
.byte 5
.previous
.Lcug:
pushq %rbx
xorl %eax,%eax /*zero for the exception handler */
......@@ -232,3 +263,34 @@ copy_user_generic:
.Le_zero:
movq %rdx,%rax
jmp .Lende
/* C stepping K8 run faster using the string copy instructions.
This is also a lot simpler. Use them when possible.
Patch in jmps to this code instead of copying it fully
to avoid unwanted aliasing in the exception tables. */
/* rdi destination
* rsi source
* rdx count
*
* Output:
* eax uncopied bytes or 0 if successfull.
*/
copy_user_generic_c:
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
1: rep
movsq
movl %edx,%ecx
2: rep
movsb
4: movl %ecx,%eax
ret
3: lea (%rdx,%rcx,8),%rax
ret
.section __ex_table,"a"
.quad 1b,3b
.quad 2b,4b
.previous
/* Copyright 2002 Andi Kleen */
#include <asm/cpufeature.h>
/*
* memcpy - Copy a memory block.
*
......@@ -86,4 +87,35 @@ memcpy:
.Lende:
popq %rbx
ret
.Lfinal:
/* C stepping K8 run faster using the string copy instructions.
It is also a lot simpler. Use this when possible */
.section .altinstructions,"a"
.align 8
.quad memcpy
.quad memcpy_c
.byte X86_FEATURE_K8_C
.byte .Lfinal-memcpy
.byte memcpy_c_end-memcpy_c
.previous
.section .altinstr_replacement,"ax"
/* rdi destination
* rsi source
* rdx count
*/
memcpy_c:
movq %rdi,%rax
movl %edx,%ecx
shrl $3,%ecx
andl $7,%edx
rep
movsq
movl %edx,%ecx
rep
movsb
ret
memcpy_c_end:
.previous
/* Copyright 2002 Andi Kleen, SuSE Labs */
/*
* ISO C memset - set a memory block to a byte value.
*
......@@ -85,3 +84,42 @@ __memset:
addq %r8,%rdi
subq %r8,%r11
jmp .Lafter_bad_alignment
/* C stepping K8 run faster using the string instructions.
It is also a lot simpler. Use this when possible */
#include <asm/cpufeature.h>
.section .altinstructions,"a"
.align 8
.quad memset
.quad memset_c
.byte X86_FEATURE_K8_C
.byte memset_c_end-memset_c
.byte memset_c_end-memset_c
.previous
.section .altinstr_replacement,"ax"
/* rdi destination
* rsi value
* rdx count
*/
memset_c:
movq %rdi,%r9
movl %edx,%r8d
andl $7,%r8d
movl %edx,%ecx
shrl $3,%ecx
/* expand byte value */
movzbl %sil,%esi
movabs $0x0101010101010101,%rax
mul %esi /* with rax, clobbers rdx */
rep
stosq
movl %r8d,%ecx
rep
stosb
movq %r9,%rax
ret
memset_c_end:
.previous
......@@ -68,7 +68,7 @@ unsigned long __clear_user(void *addr, unsigned long size)
asm volatile(
" testq %[size8],%[size8]\n"
" jz 4f\n"
"0: movnti %[zero],(%[dst])\n"
"0: movq %[zero],(%[dst])\n"
" addq %[eight],%[dst]\n"
" decl %%ecx ; jnz 0b\n"
"4: movq %[size1],%%rcx\n"
......@@ -77,7 +77,7 @@ unsigned long __clear_user(void *addr, unsigned long size)
"1: movb %b[zero],(%[dst])\n"
" incq %[dst]\n"
" decl %%ecx ; jnz 1b\n"
"2: sfence\n"
"2:\n"
".section .fixup,\"ax\"\n"
"3: lea 0(%[size1],%[size8],8),%[size8]\n"
" jmp 2b\n"
......
......@@ -195,7 +195,7 @@ __asm__ __volatile__(LOCK "andl %0,%1" \
#define atomic_set_mask(mask, addr) \
__asm__ __volatile__(LOCK "orl %0,%1" \
: : "r" ((unsigned)mask),"m" (*addr) : "memory")
: : "r" ((unsigned)mask),"m" (*(addr)) : "memory")
/* Atomic operations are already serializing on x86 */
#define smp_mb__before_atomic_dec() barrier()
......
......@@ -270,7 +270,7 @@ static __inline__ int variable_test_bit(int nr, volatile const void * addr)
* Returns the bit-number of the first zero bit, not the number of the byte
* containing a bit.
*/
static __inline__ int find_first_zero_bit(void * addr, unsigned size)
static __inline__ int find_first_zero_bit(const unsigned long * addr, unsigned size)
{
int d0, d1, d2;
int res;
......@@ -299,7 +299,7 @@ static __inline__ int find_first_zero_bit(void * addr, unsigned size)
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
*/
static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
static __inline__ int find_next_zero_bit (const unsigned long * addr, int size, int offset)
{
unsigned long * p = ((unsigned long *) addr) + (offset >> 6);
unsigned long set = 0;
......@@ -321,7 +321,7 @@ static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
/*
* No zero yet, search remaining full words for a zero
*/
res = find_first_zero_bit (p, size - 64 * (p - (unsigned long *) addr));
res = find_first_zero_bit ((const unsigned long *)p, size - 64 * (p - (unsigned long *) addr));
return (offset + set + res);
}
......@@ -334,7 +334,7 @@ static __inline__ int find_next_zero_bit (void * addr, int size, int offset)
* Returns the bit-number of the first set bit, not the number of the byte
* containing a bit.
*/
static __inline__ int find_first_bit(void * addr, unsigned size)
static __inline__ int find_first_bit(const unsigned long * addr, unsigned size)
{
int d0, d1;
int res;
......@@ -361,7 +361,7 @@ static __inline__ int find_first_bit(void * addr, unsigned size)
* @offset: The bitnumber to start searching at
* @size: The maximum size to search
*/
static __inline__ int find_next_bit(void * addr, int size, int offset)
static __inline__ int find_next_bit(const unsigned long * addr, int size, int offset)
{
unsigned int * p = ((unsigned int *) addr) + (offset >> 5);
int set = 0, bit = offset & 31, res;
......@@ -382,7 +382,7 @@ static __inline__ int find_next_bit(void * addr, int size, int offset)
/*
* No set bit yet, search remaining full words for a bit
*/
res = find_first_bit (p, size - 32 * (p - (unsigned int *) addr));
res = find_first_bit ((const unsigned long *)p, size - 32 * (p - (unsigned int *) addr));
return (offset + set + res);
}
......@@ -442,7 +442,7 @@ static __inline__ unsigned long __ffs(unsigned long word)
#ifdef __KERNEL__
static inline int sched_find_first_bit(unsigned long *b)
static inline int sched_find_first_bit(const unsigned long *b)
{
if (b[0])
return __ffs(b[0]);
......
......@@ -16,6 +16,8 @@
#include <asm/msr.h>
#include <asm/pda.h>
extern void alternative_instructions(void);
static void __init check_bugs(void)
{
identify_cpu(&boot_cpu_data);
......@@ -23,4 +25,5 @@ static void __init check_bugs(void)
printk("CPU: ");
print_cpu_info(&boot_cpu_data);
#endif
alternative_instructions();
}
......@@ -59,6 +59,7 @@
#define X86_FEATURE_K6_MTRR (3*32+ 1) /* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
#define X86_FEATURE_K8_C (3*32+ 4) /* C stepping K8 */
#define cpu_has(c, bit) test_bit(bit, (c)->x86_capability)
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
......
......@@ -277,6 +277,7 @@
#define __NR_ia32_fstatfs64 269
#define __NR_ia32_tgkill 270
#define __NR_ia32_utimes 271
#define __NR_ia32_fadvise64_64 272
#define IA32_NR_syscalls 275 /* must be > than biggest syscall! */
......
......@@ -11,16 +11,15 @@ struct x8664_pda {
struct task_struct *pcurrent; /* Current process */
unsigned long data_offset; /* Per cpu data offset from linker address */
struct x8664_pda *me; /* Pointer to itself */
unsigned long kernelstack; /* TOS for current process */
unsigned long kernelstack; /* top of kernel stack for current */
unsigned long oldrsp; /* user rsp for system call */
unsigned long irqrsp; /* Old rsp for interrupts. */
int irqcount; /* Irq nesting counter. Starts with -1 */
int cpunumber; /* Logical CPU number */
char *irqstackptr; /* top of irqstack */
unsigned long volatile *level4_pgt;
unsigned long volatile *level4_pgt; /* Per CPU top level page table */
unsigned int __softirq_pending;
unsigned int __nmi_count; /* arch dependent */
struct task_struct * __ksoftirqd_task; /* waitqueue is too large */
unsigned int __nmi_count; /* number of NMI on this CPUs */
struct mm_struct *active_mm;
int mmu_state;
unsigned apic_timer_irqs;
......
......@@ -380,4 +380,25 @@ static inline void prefetchw(void *x)
ti->task; \
})
#define ASM_NOP1 K8_NOP1
#define ASM_NOP2 K8_NOP2
#define ASM_NOP3 K8_NOP3
#define ASM_NOP4 K8_NOP4
#define ASM_NOP5 K8_NOP5
#define ASM_NOP6 K8_NOP6
#define ASM_NOP7 K8_NOP7
#define ASM_NOP8 K8_NOP8
/* Opteron nops */
#define K8_NOP1 ".byte 0x90\n"
#define K8_NOP2 ".byte 0x66,0x90\n"
#define K8_NOP3 ".byte 0x66,0x66,0x90\n"
#define K8_NOP4 ".byte 0x66,0x66,0x66,0x90\n"
#define K8_NOP5 K8_NOP3 K8_NOP2
#define K8_NOP6 K8_NOP3 K8_NOP3
#define K8_NOP7 K8_NOP4 K8_NOP3
#define K8_NOP8 K8_NOP4 K8_NOP4
#define ASM_NOP_MAX 8
#endif /* __ASM_X86_64_PROCESSOR_H */
......@@ -126,6 +126,17 @@ extern void load_gs_index(unsigned);
:"r" ((unsigned long) value))
#ifdef __KERNEL__
struct alt_instr {
__u8 *instr; /* original instruction */
__u8 *replacement;
__u8 cpuid; /* cpuid bit set for replacement */
__u8 instrlen; /* length of original instruction */
__u8 replacementlen; /* length of new instruction, <= instrlen */
__u8 pad[5];
};
#endif
/*
* Clear and set 'TS' bit respectively
*/
......
......@@ -60,7 +60,7 @@ struct thread_info {
static inline struct thread_info *current_thread_info(void)
{
struct thread_info *ti;
ti = (void *)read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE;
ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
return ti;
}
......
......@@ -256,12 +256,12 @@ static inline int __copy_from_user(void *dst, const void *src, unsigned size)
case 10:
__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16);
if (ret) return ret;
__get_user_asm(*(u16*)(8+dst),(u16*)(8+src),ret,"w","w","=r",2);
__get_user_asm(*(u16*)(8+(char*)dst),(u16*)(8+(char*)src),ret,"w","w","=r",2);
return ret;
case 16:
__get_user_asm(*(u64*)dst,(u64*)src,ret,"q","","=r",16);
if (ret) return ret;
__get_user_asm(*(u64*)(8+dst),(u64*)(8+src),ret,"q","","=r",8);
__get_user_asm(*(u64*)(8+(char*)dst),(u64*)(8+(char*)src),ret,"q","","=r",8);
return ret;
default:
return copy_user_generic(dst,src,size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment